Exemplo n.º 1
0
def train(args, data, bidaf):
    device = torch.device(
        f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    utte_encoder = EncoderRNN(args, data.WORD.vocab.vectors).to(device)
    span_encoder = EncoderRNN(args, data.WORD.vocab.vectors).to(device)
    decoder = AttnDecoderRNN(args, data.WORD.vocab.vectors).to(device)

    utte_encoder_optimizer = optim.SGD(encoder.parameters(),
                                       lr=args.learning_rate)
    span_encoder_optimizer = optim.SGD(encoder.parameters(),
                                       lr=args.learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=args.learning_rate)
    criterion = nn.NLLLoss()

    n_iters = 10 * len(data.train.examples)
    plot_loss_total = []
    print_every = 10000
    for iter in range(1, n_iters + 1):
        input_tensor = data.train.examples[i].q_word
        target_tensor = data.train.examples[i].ans
        span = ata.train.examples[i].span
        loss = train_each(input_tensor, target_tensor, utte_encoder,
                          span_encoder, decoder, utte_encoder_optimizer,
                          span_encoder_optimizer, decoder_optimizer, criterion)
        print_loss += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %
                  (timeSince(start, iter / n_iters), iter,
                   iter / n_iters * 100, print_loss_avg))
Exemplo n.º 2
0
 def load_model_state(self, model_file):
     print("Resuming training from a given model...")
     model = torch.load(model_file, map_location=lambda storage, loc: storage)
     epoch = model['epoch']
     encoder_state_dict = model['encoder_state_dict']
     encoder_optimizer_state_dict = model['encoder_optimizer_state_dict']
     decoder_state_dict = model['decoder_state_dict']
     decoder_optimizer_state_dict = model['decoder_optimizer_state_dict']
     loss = model['loss']
     encoder = EncoderRNN(self.wm, self.embedding_size,\
         self.hidden_size, self.bidirectional)
     decoder = AttnDecoderRNN("general", self.hidden_size, 10)
     enc_optimizer = optim.Adam(encoder.parameters(), lr=self.learning_rate)
     dec_optimizer = optim.Adam(decoder.parameters(), lr=self.learning_rate)
     
     return encoder, decoder, enc_optimizer, dec_optimizer, epoch
Exemplo n.º 3
0
def main():
    input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
    print(random.choice(pairs))

    device = torch.device(args.device)
    print('device : {}'.format(device))

    encoder = EncoderRNN(input_lang.n_words, args.hidden_size).to(device)
    decoder = AttnDecoderRNN(args.hidden_size,
                             output_lang.n_words,
                             dropout_p=0.1).to(device)

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=args.lr)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=args.lr)

    model = Translator(input_lang, output_lang, encoder, decoder,
                       encoder_optimizer, decoder_optimizer)

    trainIters(model, pairs, n_iters=10000, print_every=100, plot_every=100)

    evaluateRandomly(model, pairs)

    output_words, attentions = evaluate(model, "je suis trop froid .")
    plt.matshow(attentions.numpy())
Exemplo n.º 4
0
def loadmodel(model_file, wm, hidden_size, bidirectional):
    """
    Loads the trained model, returns the encoder and decoder for inferencing.
    We initialize 'empty models' in which we will load our parameters.
    It is important that the hyperparameters are the same as used for training.

    Keyword arguments:
    model_file - string with the model location
    wm - embedding matrix
    hidden_size - hidden size
    bidirectional - whether we use bidirectional GRU layers
    """
    model = torch.load(model_file, map_location=lambda storage, loc: storage)
    epoch = model['epoch']
    encoder_state_dict = model['encoder_state_dict']
    encoder_optimizer_state_dict = model['encoder_optimizer_state_dict']
    decoder_state_dict = model['decoder_state_dict']
    decoder_optimizer_state_dict = model['decoder_optimizer_state_dict']
    loss = model['loss']
    encoder = EncoderRNN(wm, 300, hidden_size, bidirectional)
    decoder = AttnDecoderRNN(hidden_size, 10)
    enc_optimizer = optim.Adam(encoder.parameters(), lr=0.0001)
    dec_optimizer = optim.Adam(decoder.parameters(), lr=0.0001)
    return encoder, decoder
Exemplo n.º 5
0
def main(args):
    global best_bleu4, epochs_since_improvement, checkpoint, start_epoch, fine_tune_encoder, data_name, word_map

    # Load vocabulary wrapper
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    if args.checkpoint is None:
        decoder = AttnDecoderRNN(attention_dim=args.attention_dim,
                                 embed_dim=args.embed_dim,
                                 decoder_dim=args.decoder_dim,
                                 vocab_size=len(vocab),
                                 dropout=args.dropout)
        decoder_optimizer = torch.optim.Adam(params=filter(
            lambda p: p.requires_grad, decoder.parameters()),
                                             lr=args.decoder_lr)
        encoder = EncoderCNN()
        encoder.fine_tune(args.fine_tune_encoder)
        encoder_optimizer = torch.optim.Adam(
            params=filter(lambda p: p.requires_grad, encoder.parameters()),
            lr=args.encoder_lr) if args.fine_tune_encoder else None
    else:
        checkpoint = torch.load(args.checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        best_bleu4 = checkpoint['bleu-4']
        decoder = checkpoint['decoder']
        decoder_optimizer = checkpoint['decoder_optimizer']
        encoder = checkpoint['encoder']
        encoder_optimizer = checkpoint['encoder_optimizer']
        if fine_tune_encoder is True and encoder_optimizer is None:
            encoder.fine_tune(fine_tune_encoder)
            encoder_optimizer = torch.optim.Adam(params=filter(
                lambda p: p.requires_grad, encoder.parameters()),
                                                 lr=args.encoder_lr)
    decoder = decoder.to(device)
    encoder = encoder.to(device)

    criterion = nn.CrossEntropyLoss().to(device)

    # Image preprocessing, normalization for the pretrained resnet
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    # Build data loader
    train_loader = get_loader(args.image_dir,
                              args.caption_path,
                              vocab,
                              transform,
                              args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)

    val_loader = get_loader(args.image_dir_val,
                            args.caption_path_val,
                            vocab,
                            transform,
                            args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)

    for epoch in range(args.start_epoch, args.epochs):
        if args.epochs_since_improvement == 20:
            break
        if args.epochs_since_improvement > 0 and args.epochs_since_improvement % 8 == 0:
            adjust_learning_rate(decoder_optimizer, 0.8)
            if args.fine_tune_encoder:
                adjust_learning_rate(encoder_optimizer, 0.8)

        train(train_loader=train_loader,
              encoder=encoder,
              decoder=decoder,
              criterion=criterion,
              encoder_optimizer=encoder_optimizer,
              decoder_optimizer=decoder_optimizer,
              epoch=epoch)

        recent_bleu4 = validate(val_loader=val_loader,
                                encoder=encoder,
                                decoder=decoder,
                                criterion=criterion)

        is_best = recent_bleu4 > best_bleu4
        best_bleu4 = max(recent_bleu4, best_bleu4)
        if not is_best:
            args.epochs_since_improvement += 1
            print("\nEpoch since last improvement: %d\n" %
                  (args.epochs_since_improvement, ))
        else:
            args.epochs_since_improvement = 0

        save_checkpoint(args.data_name, epoch, args.epochs_since_improvement,
                        encoder, decoder, encoder_optimizer, decoder_optimizer,
                        recent_bleu4, is_best)
Exemplo n.º 6
0
def train(train_set,
          langs,
          embedding_size=600,
          learning_rate=0.01,
          iter_time=10,
          batch_size=32,
          get_loss=GET_LOSS,
          save_model=SAVE_MODEL,
          encoder_style=ENCODER_STYLE,
          use_model=USE_MODEL):
    """The training procedure."""
    # Set the timer
    start = time.time()

    # Initialize the model
    emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words,
                       langs['rm'].n_words, embedding_size)
    emb.init_weights()

    if encoder_style == 'LIN':
        encoder = EncoderLIN(embedding_size, emb)
    elif encoder_style == 'BiLSTM':
        encoder = EncoderBiLSTM(embedding_size, emb)
    else:
        encoder = EncoderRNN(embedding_size, emb)

    decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words)

    if use_cuda:
        emb.cuda()
        encoder.cuda()
        decoder.cuda()

    if use_model is not None:
        encoder = load_model(encoder, use_model[0])
        decoder = load_model(decoder, use_model[1])

    # Choose optimizer
    loss_optimizer = optim.Adagrad(list(encoder.parameters()) +
                                   list(decoder.parameters()),
                                   lr=learning_rate,
                                   lr_decay=0,
                                   weight_decay=0)
    # decoder_optimizer = optim.Adagrad(decoder.parameters(), lr=learning_rate, lr_decay=0, weight_decay=0)

    criterion = nn.NLLLoss()

    total_loss = 0
    iteration = 0
    for epo in range(1, iter_time + 1):
        print("Epoch #%d" % (epo))
        # Get data

        train_iter = data_iter(train_set, batch_size=batch_size)
        for dt in train_iter:
            iteration += 1
            data, idx_data = get_batch(dt)
            rt, re, rm, summary = idx_data

            # Add paddings
            rt = addpaddings(rt)
            re = addpaddings(re)
            rm = addpaddings(rm)
            summary = addpaddings(summary)

            rt = Variable(torch.LongTensor(rt), requires_grad=False)
            re = Variable(torch.LongTensor(re), requires_grad=False)
            rm = Variable(torch.LongTensor(rm), requires_grad=False)

            # For Decoding
            summary = Variable(torch.LongTensor(summary), requires_grad=False)

            if use_cuda:
                rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda(
                ), summary.cuda()

            # Get the average loss on the sentences
            loss = sentenceloss(rt, re, rm, summary, encoder, decoder,
                                loss_optimizer, criterion, embedding_size,
                                encoder_style)
            total_loss += loss

            # Print the information and save model
            if iteration % get_loss == 0:
                print("Time {}, iter {}, avg loss = {:.4f}".format(
                    gettime(start), iteration, total_loss / get_loss))
                total_loss = 0
        if epo % save_model == 0:
            torch.save(encoder.state_dict(),
                       "{}_encoder_{}".format(OUTPUT_FILE, iteration))
            torch.save(decoder.state_dict(),
                       "{}_decoder_{}".format(OUTPUT_FILE, iteration))
            print("Save the model at iter {}".format(iteration))

    return encoder, decoder
Exemplo n.º 7
0
                         dropout_p=config.DROPOUT)
if config.RESTORE:
    encoder_path = os.path.join(config.MODEL_DIR, "encoder.pth")
    decoder_path = os.path.join(config.MODEL_DIR, "decoder.pth")

    encoder.load_state_dict(torch.load(encoder_path))
    decoder.load_state_dict(torch.load(decoder_path))

# Move models to GPU
if config.USE_CUDA:
    encoder.cuda()
    decoder.cuda()

# Initialize optimizers and criterion
encoder_optimizer = optim.Adam(encoder.parameters(), lr=config.LR)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=config.LR)
criterion = LanguageModelCriterion()  #nn.NLLLoss(ignore_index=0)

# Keep track of time elapsed and running averages
start = time.time()
plot_losses = []
print_loss_total = 0
plot_loss_total = 0

for epoch in range(1, config.NUM_ITER + 1):

    # Get training data for this cycle
    input_index, output_index, mask_batch = next(train_dataloader.load())
    input_variable = Variable(torch.LongTensor(input_index))
    output_variable = Variable(torch.LongTensor(output_index))
    mask_variable = Variable(torch.FloatTensor(mask_batch))
Exemplo n.º 8
0
        lang_tuple = pkl.load(f)
    lang = Lang(lang_tuple)

    # Prepare dataloader for training
    train_dataiter = DataIter(train_pairs, lang, args.vocab_size, args.batch_size, args.cuda)

    # Set encoder and decoder
    encoder = Encoder(args.vocab_size, args.hidden_size)
    decoder = AttnDecoderRNN(args.attn, args.hidden_size, args.vocab_size, args.n_layers, args.dropout, args.cuda)
    if args.cuda:
        encoder = encoder.cuda()
        decoder = decoder.cuda()
    
    # Set optimizer and criterion
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    encoder_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=encoder_optimizer, 
        mode='min', 
        factor=0.1, 
        patience=5, 
        verbose=True,
        min_lr=0.00001)
    decoder_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=decoder_optimizer, 
        mode='min', 
        factor=0.1,
        patience=5, 
        verbose=True,
        min_lr=0.00001)
    criterion = nn.NLLLoss(ignore_index=PAD_token)
Exemplo n.º 9
0
class EncoderDecoder(object):
    """EncoderDecoder"""
    def __init__(self,
                 hidden_size=128,
                 input_vocab_len=10000,
                 output_vocab_len=10000,
                 dropout_p=0.1,
                 teacher_forcing_ratio=0.5,
                 max_length=10,
                 learning_rate=0.01,
                 simple=False,
                 bidirectional=False,
                 dot=False,
                 multi=False,
                 num_layers=1):
        super(EncoderDecoder, self).__init__()

        self.hidden_size = hidden_size
        self.input_vocab_len = input_vocab_len
        self.output_vocab_len = output_vocab_len
        self.dropout_p = dropout_p
        self.max_length = max_length
        self.learning_rate = learning_rate
        self.simple = simple
        self.dot = dot
        self.bidirectional = bidirectional
        self.teacher_forcing_ratio = teacher_forcing_ratio
        self.multi = multi
        self.num_layers = num_layers

        if self.multi:
            self.encoder = code.MultiLayerBidirectionalEncoderRNN(
                input_vocab_len, hidden_size, num_layers=num_layers).to(device)
            self.decoder = code.MultiLayerAttnDecoderRNNDot(
                hidden_size,
                output_vocab_len,
                dropout_p=dropout_p,
                max_length=max_length,
                num_layers=num_layers).to(device)
        else:
            if self.bidirectional:
                self.encoder = code.define_bi_encoder(input_vocab_len,
                                                      hidden_size).to(device)
            else:
                self.encoder = EncoderRNN(input_vocab_len,
                                          hidden_size).to(device)

            if self.simple:
                self.decoder = code.define_simple_decoder(
                    hidden_size,
                    input_vocab_len,
                    output_vocab_len,
                    max_length,
                    num_layers=num_layers).to(device)
            else:
                if not self.dot:
                    self.decoder = AttnDecoderRNN(
                        hidden_size,
                        output_vocab_len,
                        dropout_p=dropout_p,
                        max_length=self.max_length).to(device)
                else:
                    self.decoder = code.AttnDecoderRNNDot(
                        hidden_size,
                        output_vocab_len,
                        dropout_p=dropout_p,
                        max_length=max_length).to(device)

        self.encoder_optimizer = None
        self.decoder_optimizer = None
        self.criterion = None
        self.input_lang = None
        self.output_lang = None

    def indexesFromSentence(self, lang, sentence, char=False):
        if char:
            return [lang.char2index[char] for char in sentence]
        else:
            return [lang.word2index[word] for word in sentence.split(' ')]

    def tensorFromSentence(self, lang, sentence, char=False):
        indexes = self.indexesFromSentence(lang, sentence, char)
        indexes.append(EOS_token)
        return torch.tensor(indexes, dtype=torch.long,
                            device=device).view(-1, 1)

    def tensorsFromPair(self, pair, char=False):
        input_tensor = self.tensorFromSentence(self.input_lang, pair[0], char)
        target_tensor = self.tensorFromSentence(self.output_lang, pair[1],
                                                char)
        return (input_tensor, target_tensor)

    def train(self, input_tensor, target_tensor):
        encoder_hidden = self.encoder.initHidden()

        self.encoder_optimizer.zero_grad()
        self.decoder_optimizer.zero_grad()

        input_length = input_tensor.size(0)
        target_length = target_tensor.size(0)

        encoder_outputs = torch.zeros(self.max_length,
                                      self.hidden_size,
                                      device=device)

        loss = 0

        for ei in range(input_length):
            encoder_output, encoder_hidden = self.encoder(
                input_tensor[ei], encoder_hidden)

            if self.bidirectional:
                encoder_output = code.fix_bi_encoder_output_dim(
                    encoder_output, self.hidden_size)
            if self.multi:
                encoder_output = code.fix_multi_bi_encoder_output_dim(
                    encoder_output, self.hidden_size)

            encoder_outputs[ei] = encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)

        if self.bidirectional:
            decoder_hidden = code.fix_bi_encoder_hidden_dim(encoder_hidden)
        elif self.multi:
            decoder_hidden = code.fix_multi_bi_encoder_hidden_dim(
                encoder_hidden)
        else:
            decoder_hidden = encoder_hidden

        use_teacher_forcing = True if random.random(
        ) < self.teacher_forcing_ratio else False

        if use_teacher_forcing:
            # Teacher forcing: Feed the target as the next input
            for di in range(target_length):
                if self.simple:
                    decoder_output, decoder_hidden = code.run_simple_decoder(
                        self.decoder, decoder_input, encoder_hidden,
                        decoder_hidden, encoder_outputs)
                else:
                    decoder_output, decoder_hidden, decoder_attention = self.decoder(
                        decoder_input, decoder_hidden, encoder_outputs)
                loss += self.criterion(decoder_output, target_tensor[di])
                decoder_input = target_tensor[di]  # Teacher forcing

        else:
            # Without teacher forcing: use its own predictions as the next input
            for di in range(target_length):
                if self.simple:
                    decoder_output, decoder_hidden = code.run_simple_decoder(
                        self.decoder, decoder_input, encoder_hidden,
                        decoder_hidden, encoder_outputs)
                else:
                    decoder_output, decoder_hidden, decoder_attention = self.decoder(
                        decoder_input, decoder_hidden, encoder_outputs)

                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze().detach(
                )  # detach from history as input

                loss += self.criterion(decoder_output, target_tensor[di])
                if decoder_input.item() == EOS_token:
                    break

        loss.backward()

        self.encoder_optimizer.step()
        self.decoder_optimizer.step()

        return loss.item() / target_length

    def trainIters(self,
                   pairs,
                   input_lang,
                   output_lang,
                   n_iters,
                   print_every=1000,
                   plot_every=100,
                   char=False):
        start = time.time()
        plot_losses = []
        print_loss_total = 0  # Reset every print_every
        plot_loss_total = 0  # Reset every plot_every

        self.input_lang = input_lang
        self.output_lang = output_lang
        self.encoder_optimizer = optim.SGD(self.encoder.parameters(),
                                           lr=self.learning_rate)
        self.decoder_optimizer = optim.SGD(self.decoder.parameters(),
                                           lr=self.learning_rate)
        selected_pairs = [random.choice(pairs) for i in range(n_iters)]
        training_pairs = [
            self.tensorsFromPair(pair, char) for pair in selected_pairs
        ]
        self.criterion = nn.NLLLoss()

        for iter in range(1, n_iters + 1):
            training_pair = training_pairs[iter - 1]
            input_tensor = training_pair[0]
            target_tensor = training_pair[1]
            loss = self.train(input_tensor, target_tensor)
            print_loss_total += loss
            plot_loss_total += loss

            if iter % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print('%s (%d %d%%) %.4f' %
                      (timeSince(start, iter / n_iters), iter,
                       iter / n_iters * 100, print_loss_avg))

            if iter % plot_every == 0:
                plot_loss_avg = plot_loss_total / plot_every
                plot_losses.append(plot_loss_avg)
                plot_loss_total = 0

        showPlot(plot_losses)

    def evaluate(self, sentence, char=False):
        with torch.no_grad():
            input_tensor = self.tensorFromSentence(self.input_lang, sentence,
                                                   char)
            input_length = input_tensor.size()[0]
            encoder_hidden = self.encoder.initHidden()

            encoder_outputs = torch.zeros(self.max_length,
                                          self.encoder.hidden_size,
                                          device=device)

            for ei in range(input_length):
                encoder_output, encoder_hidden = self.encoder(
                    input_tensor[ei], encoder_hidden)
                if self.bidirectional:
                    encoder_output = code.fix_bi_encoder_output_dim(
                        encoder_output, self.hidden_size)
                if self.multi:
                    encoder_output = code.fix_multi_bi_encoder_output_dim(
                        encoder_output, self.hidden_size)

                encoder_outputs[ei] += encoder_output[0, 0]

            decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

            if self.bidirectional:
                decoder_hidden = code.fix_bi_encoder_hidden_dim(encoder_hidden)
            elif self.multi:
                decoder_hidden = code.fix_multi_bi_encoder_hidden_dim(
                    encoder_hidden)
            else:
                decoder_hidden = encoder_hidden

            decoded_words = []
            if not self.simple:
                decoder_attentions = torch.zeros(self.max_length,
                                                 self.max_length)

            for di in range(self.max_length):
                if self.simple:
                    decoder_output, decoder_hidden = code.run_simple_decoder(
                        self.decoder, decoder_input, encoder_hidden,
                        decoder_hidden, encoder_outputs)
                else:
                    decoder_output, decoder_hidden, decoder_attention = self.decoder(
                        decoder_input, decoder_hidden, encoder_outputs)
                    decoder_attentions[di] = decoder_attention.data

                topv, topi = decoder_output.data.topk(1)

                if topi.item() == EOS_token:
                    decoded_words.append('<EOS>')
                    break
                else:
                    if char:
                        decoded_words.append(
                            self.output_lang.index2char[topi.item()])
                    else:
                        decoded_words.append(
                            self.output_lang.index2word[topi.item()])

                decoder_input = topi.squeeze().detach()

        if not self.simple:
            return decoded_words, decoder_attentions[:di + 1]
        else:
            return decoded_words, None

    @classmethod
    def load(cls, directory):
        with open(os.path.join(directory, 'args.pkl'), 'rb') as f:
            params = cloudpickle.load(f)

        model = EncoderDecoder(
            params['hidden_size'],
            params['input_vocab_len'],
            params['output_vocab_len'],
            dropout_p=params['dropout_p'],
            teacher_forcing_ratio=params['teacher_forcing_ratio'],
            max_length=params['max_length'],
            learning_rate=params['learning_rate'],
            simple=params['simple'],
            bidirectional=params['bidirectional'],
            dot=params['dot'],
            multi=params['multi'],
            num_layers=params['num_layers'])

        model.input_lang = params['input_lang']
        model.output_lang = params['output_lang']
        model.encoder.load_state_dict(
            torch.load(os.path.join(directory, 'encoder.pt'),
                       map_location=lambda storage, loc: storage).state_dict())
        model.decoder.load_state_dict(
            torch.load(os.path.join(directory, 'decoder.pt'),
                       map_location=lambda storage, loc: storage).state_dict())

        return model

    def save(self, directory):
        if not os.path.exists(directory):
            os.makedirs(directory)

        def create_save_model(model, path):
            return torch.save(model, path)

        create_save_model(self.encoder, directory + 'encoder.pt')
        create_save_model(self.decoder, directory + 'decoder.pt')

        with open(os.path.join(directory, 'args.pkl'), 'wb') as f:
            cloudpickle.dump(
                {
                    'input_lang': self.input_lang,
                    'output_lang': self.output_lang,
                    'dropout_p': self.dropout_p,
                    'teacher_forcing_ratio': self.teacher_forcing_ratio,
                    'max_length': self.max_length,
                    'learning_rate': self.learning_rate,
                    'hidden_size': self.hidden_size,
                    'input_vocab_len': self.input_vocab_len,
                    'output_vocab_len': self.output_vocab_len,
                    'simple': self.simple,
                    'bidirectional': self.bidirectional,
                    'dot': self.dot,
                    'multi': self.multi,
                    'num_layers': self.num_layers
                }, f)

    def evaluatePairs(self, pairs, rand=True, n=10, plot=False, char=False):
        n = n if rand else len(pairs)
        outputs = []
        for i in range(n):
            if rand:
                pair = random.choice(pairs)
            else:
                pair = pairs[i]
            print('>', pair[0])
            print('=', pair[1])
            output_words, attentions = self.evaluate(pair[0], char)
            if plot and not self.simple:
                plt.matshow(attentions.numpy())
            if char:
                output_sentence = ''.join(output_words[:-1])
            else:
                output_sentence = ' '.join(output_words[:-1])
            outputs.append((output_sentence, pair[1]))
            print('<', output_sentence)
            print('')
        return outputs

    def evaluateAndShowAttention(self, input_sentence, char=False):
        output_words, attentions = self.evaluate(
            normalizeString(input_sentence), char)
        print('input =', input_sentence)
        if char:
            print('output =', ''.join(output_words))
        else:
            print('output =', ' '.join(output_words))
        if not self.simple:
            showAttention(normalizeString(input_sentence),
                          output_words,
                          attentions[:, :len(output_words)],
                          char=char)
        else:
            print(
                "Not an attention based model as per the parameter 'simple' !")
Exemplo n.º 10
0
                                        nlayers, dropout_p)
        else:
            encoder = MetaNetRNN(emb_size, input_size, output_size, nlayers,
                                 dropout_p)
        if use_attention:
            decoder = AttnDecoderRNN(emb_size, output_size, nlayers, dropout_p)
        else:
            decoder = DecoderRNN(emb_size, output_size, nlayers, dropout_p)
        if USE_CUDA:
            encoder = encoder.cuda()
            decoder = decoder.cuda()
        criterion = nn.NLLLoss()
        print('  Set learning rate to ' + str(adam_learning_rate))
        encoder_optimizer = optim.Adam(encoder.parameters(),
                                       lr=adam_learning_rate)
        decoder_optimizer = optim.Adam(decoder.parameters(),
                                       lr=adam_learning_rate)
        print("")
        print("Architecture options...")
        print(" Decoder attention is USED") if use_attention else print(
            " Decoder attention is NOT used")
        print(" External memory is USED") if not disable_memory else print(
            " External memory is NOT used")
        print(" Reconstruction loss is USED"
              ) if not disable_recon_loss else print(
                  " Reconstruction loss is NOT used")
        print("")
        describe_model(encoder)
        describe_model(decoder)

        # create validation episodes
Exemplo n.º 11
0
class Train(object):
    """

    """

    #TODO : need to change, with no dataset in the train class, a apply method should be
    #TODO : in this class, which accept a dataset parameters and train the dataset .

    def __init__(self, config, dataset):
        self.config = config
        self.n_epochs = config.n_epochs
        self.encoder = EncoderRNN(n_dict=dataset.source.n_words, config=config)
        self.decoder = AttnDecoderRNN(n_dict=dataset.target.n_words,
                                      config=config)
        self.encoder_optimizer = config.optimizier(self.encoder.parameters(),
                                                   lr=config.learning_rate)
        self.decoder_optimizer = config.optimizier(self.decoder.parameters(),
                                                   lr=config.learning_rate)
        self.criterion = nn.NLLLoss()
        self.is_plot = config.is_plot
        self.clip_value = config.clip_value
        self.losses = []
        if self.config.USE_CUDA:
            self.encoder.cuda(self.config.gpu_id)
        if self.config.USE_CUDA:
            self.decoder.cuda(device_id=self.config.gpu_id)

    def train(self, dataset):
        if self.is_plot:
            fig, ax = plt.subplots()
            grid(True)
            plt.ion()
        for epoch in range(self.n_epochs):
            training_pair = dataset.get_sample_var()
            loss, result_output = self.step(training_pair)
            print("At Epoch : {:5},Get loss : {:10}\n".format(epoch, loss))
            self.losses.append(loss)
            if self.is_plot:
                ax.plot(range(epoch + 1), self.losses, "b")
                plt.pause(0.0001)
                plt.show()
            if epoch % 100 == 0:
                print ''.join([
                    dataset.target.index2word[i]
                    for i in training_pair[1].squeeze(1).data.tolist()
                ])
                print ''.join(
                    [dataset.target.index2word[i] for i in result_output])

    def step(self, training_pair):
        self.encoder_optimizer.zero_grad()
        self.decoder_optimizer.zero_grad()

        input_variable = training_pair[0]
        target_variable = training_pair[1]

        loss = 0
        input_length = input_variable.size()[0]
        target_length = target_variable.size()[0]

        encoder_hidden = self.encoder.init_hidden()
        encoder_outputs, encoder_hidden = self.encoder(input_variable,
                                                       encoder_hidden)
        decoder_input = Variable(torch.LongTensor([[self.config.SOS_token]]))
        decoder_context = Variable(torch.zeros(1, self.decoder.hidden_dim))
        decoder_hidden = encoder_hidden
        if self.config.USE_CUDA:
            decoder_input = decoder_input.cuda(device_id=self.config.gpu_id)
            decoder_context = decoder_context.cuda(
                device_id=self.config.gpu_id)
            assert type(decoder_input.data) == torch.cuda.LongTensor
            assert type(decoder_context.data) == torch.cuda.FloatTensor
        result_output = []
        for di in range(target_length):
            decoder_output, \
            decoder_context, \
            decoder_hidden, \
            decoder_attention = self.decoder(decoder_input,
                                             decoder_context,
                                             decoder_hidden,
                                             encoder_outputs)
            loss += self.criterion(decoder_output[0], target_variable[di])
            topv, topi = decoder_output.data.topk(1)
            ni = topi[0][0]
            decoder_input = Variable(torch.LongTensor([[ni]]))
            if self.config.USE_CUDA:
                decoder_input = decoder_input.cuda(
                    device_id=self.config.gpu_id)
            result_output.append(ni)
            if ni == self.config.EOS_token: break
        loss.backward()
        # TODO : clip value
        torch.nn.utils.clip_grad_norm(self.encoder.parameters(),
                                      self.clip_value)
        torch.nn.utils.clip_grad_norm(self.decoder.parameters(),
                                      self.clip_value)

        self.encoder_optimizer.step()
        self.decoder_optimizer.step()
        if self.config.USE_CUDA:
            return loss.cpu().data[0] / target_length, result_output
        return loss.data[0] / target_length, result_output
Exemplo n.º 12
0
n_layers = 2
dropout_p = 0.05

# Initialize models
encoder = EncoderRNN(input_lang.n_words, hidden_size, n_layers)
decoder = AttnDecoderRNN(attn_model, hidden_size, output_lang.n_words, n_layers, dropout_p=dropout_p)

# Move models to GPU
if USE_CUDA:
    encoder.cuda()
    decoder.cuda()

# Initialize optimizers and criterion
learning_rate = 0.0001
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
criterion = nn.NLLLoss()

# Configuring training
n_epochs = 50000
plot_every = 200
print_every = 1000

# Keep track of time elapsed and running averages
start = time.time()
plot_losses = []
print_loss_total = 0 # Reset every print_every
plot_loss_total = 0 # Reset every plot_every

# Begin!
for epoch in range(1, n_epochs + 1):
Exemplo n.º 13
0
def main(args):

    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406),
                             (0.229, 0.224,
                              0.225))
    ])

    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    data_loader = get_loader(args.image_dir, args.caption_path, vocab, transform, args.batch_size,
                             shuffle=True, num_workers=args.num_workers)

    encoder = EncoderCNN(args.embed_size)
    decoder = AttnDecoderRNN(args.embed_size, args.hidden_size, len(vocab), args.num_layers)

    if torch.cuda.is_available():
        encoder.cuda()
        decoder.cuda()

    criterion = nn.CrossEntropyLoss()
    params = list(decoder.parameters()) + list(encoder.linear.parameters()) + list(encoder.bn.parameters())
    optimizer = torch.optim.Adam(params, lr=args.learning_rate)

    total_step = len(data_loader)
    decoder_hidden = decoder.init_hidden()

    for epoch in range(args.num_epochs):
        for i, (images, captions, lengths) in enumerate(data_loader):

            images = cuda_variable(images, volatile=True)
            captions = cuda_variable(captions)
            targets = pack_padded_sequence(captions, lengths, batch_first=True)[0]

            decoder.zero_grad()
            encoder.zero_grad()
            features = encoder(images)

            outputs = decoder(captions, decoder_hidden, features, lengths)
            # outputs = decoder(features, captions, lengths)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            if i % args.log_step == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f, Perplexity: %5.4f'
                      %(epoch, args.num_epochs, i, total_step,
                        loss.data[0], np.exp(loss.data[0])))

            if (i+1) % args.save_step == 0:
                torch.save(decoder.state_dict(),
                           os.path.join(args.model_path,
                                        'decoder-%d-%d.pkl' %(epoch+1, i+1)))
                torch.save(encoder.state_dict(),
                           os.path.join(args.model_path,
                                        'encoder-%d-%d.pkl' %(epoch+1, i+1)))
Exemplo n.º 14
0



# Initialize models
embedder = EmbeddingMatrix(Vocab_Size, Embedding_Size)
encoder = EncoderRNN(Embedding_Size, Hidden_Size)
decoder = AttnDecoderRNN(Embedding_Size, Hidden_Size, Vocab_Size, Extend_Vocab_Size, dropout_p=0.3)

embedder = embedder.cuda()
encoder = encoder.cuda()
decoder = decoder.cuda()

embedder_optimizer = optim.Adam(embedder.parameters(),lr = learning_rate)
encoder_optimizer = optim.Adam(encoder.parameters(),lr = learning_rate, weight_decay=0.0000001)
decoder_optimizer = optim.Adam(decoder.parameters(),lr = learning_rate, weight_decay=0.0000001)
criterion = nn.NLLLoss(ignore_index = 0).cuda()

#configring traing

n_epochs = 40
plot_every = 2
print_every = 5
start = time.time()
plot_losses =[]
print_loss = 0
print_loss_total = 0
plot_loss_total = 0
#begin
loss_list=[]
mydataset =  KP20K('dataset','small',True)
Exemplo n.º 15
0
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.set_num_threads(10)

# Split into training and data set
train_set, val_set = random_split(
    recipe_step_pairs,
    [TRAIN_SET_SIZE, len(recipe_step_pairs) - TRAIN_SET_SIZE])
print(len(train_set))
print(len(val_set))

encoder = EncoderRNN(n_words, HIDDEN_DIM).to(device)
decoder = AttnDecoderRNN(HIDDEN_DIM, n_words, max_length=MAX_LENGTH).to(device)

encoder_optimizer = optim.SGD(encoder.parameters(), lr=LEARNING_RATE)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=LEARNING_RATE)
loss_function = nn.NLLLoss()

losses_per_epoch = []
for e in range(N_EPOCHS):
    print("---- epoch ", e)
    train_set = list(train_set)
    shuffle(train_set)
    loss = trainIters(encoder,
                      decoder,
                      n_iters=TRAIN_SET_SIZE,
                      max_length=MAX_LENGTH,
                      print_every=REPORT_EVERY)
    losses_per_epoch.append(loss)

torch.save(
Exemplo n.º 16
0
class Seq2Pose():
    def __init__(self, wm, input_length, batch_size, hidden_size, bidirectional\
            , embedding_size, n_parameter, m_parameter, learning_rate, clip,\
                alpha, beta, pre_trained_file = None):
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.bidirectional = bidirectional
        self.n_parameter = n_parameter
        self.m_parameter = m_parameter
        self.learning_rate = learning_rate
        self.wm = wm
        self.clip = clip
        self.alpha = alpha
        self.beta = beta
        if pre_trained_file == None:
            self.encoder = EncoderRNN(self.wm, self.embedding_size,\
                hidden_size, bidirectional)
            self.decoder = AttnDecoderRNN(self.hidden_size, 10)
            self.enc_optimizer = optim.Adam(self.encoder.parameters(),\
                lr=self.learning_rate)
            self.dec_optimizer = optim.Adam(self.decoder.parameters(),\
                lr=self.learning_rate)
            self.start = 0
        else:
            self.resume_training = True
            self.encoder, self.decoder, self.enc_optimizer, self.dec_optimizer,\
                self.start = self.load_model_state(pre_trained_file)
        self.decoder = self.decoder.to(device)
        self.encoder = self.encoder.to(device)

    def load_model_state(self, model_file):
        print("Resuming training from a given model...")
        model = torch.load(model_file,
                           map_location=lambda storage, loc: storage)
        epoch = model['epoch']
        encoder_state_dict = model['encoder_state_dict']
        encoder_optimizer_state_dict = model['encoder_optimizer_state_dict']
        decoder_state_dict = model['decoder_state_dict']
        decoder_optimizer_state_dict = model['decoder_optimizer_state_dict']
        loss = model['loss']
        encoder = EncoderRNN(self.wm, self.embedding_size,\
            self.hidden_size, self.bidirectional)
        decoder = AttnDecoderRNN(self.hidden_size, 10)
        enc_optimizer = optim.Adam(encoder.parameters(), lr=self.learning_rate)
        dec_optimizer = optim.Adam(decoder.parameters(), lr=self.learning_rate)
        return encoder, decoder, enc_optimizer, dec_optimizer, epoch

    def train(self, epochs, x_train, y_train):
        """
        Training loop, trains the network for the given parameters.

        Keyword arguments:
        epochs - number of epochs to train for (looping over the whole dataset)
        x_train - training data, contains a list of integer encoded strings
        y_train - training data, contains a list of pose sequences
        """
        criterion = CustomLoss(self.alpha, self.beta)
        training_set = Dataset(x_train, y_train)
        training_generator = data.DataLoader(training_set,\
            batch_size=self.batch_size, shuffle=True,\
            collate_fn=self.pad_and_sort_batch,\
            num_workers=8, drop_last=True)
        decoder_fixed_previous = Variable(torch.zeros(self.n_parameter,\
            self.batch_size, 10, requires_grad=False)).to(device)
        decoder_fixed_input = torch.FloatTensor\
            ([[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]] *\
                self.batch_size).to(device)

        for epoch in range(self.start, epochs):
            total_loss = 0
            for mini_batches, max_target_length in tqdm(training_generator):
                #kickstart vectors
                self.enc_optimizer.zero_grad()
                self.dec_optimizer.zero_grad()
                loss = 0
                decoder_previous_inputs = decoder_fixed_previous
                for z in range(self.n_parameter):
                    decoder_previous_inputs[z] = decoder_fixed_input
                for i, (x, y, lengths) in enumerate(mini_batches):
                    t1 = time.perf_counter()
                    x = x.to(device)
                    y = y.to(device)
                    decoder_m = np.shape(y)[0]
                    encoder_outputs, encoder_hidden = self.encoder(x, None)
                    decoder_hidden = encoder_hidden[:self.decoder.n_layers]
                    decoder_output = None
                    for n_prev in range(self.n_parameter):
                        decoder_output, decoder_hidden, attn_weights =\
                            self.decoder(decoder_previous_inputs[n_prev].float(),\
                                decoder_hidden, encoder_outputs)
                    decoder_input = decoder_output.float()
                    decoder_previous_generated = Variable(torch.zeros(decoder_m,\
                        self.batch_size, 10, requires_grad=False)).to(device)
                    decoder_outputs_generated = Variable(torch.zeros(decoder_m,\
                        self.batch_size, 10, requires_grad=False)).to(device)
                    for fut_pose in range(decoder_m):
                        decoder_output, decoder_hidden, attn_weights =\
                            self.decoder(decoder_input,decoder_hidden, encoder_outputs)
                        decoder_outputs_generated[fut_pose] = decoder_output
                        decoder_input = y[fut_pose].float()
                    decoder_previous_inputs = decoder_outputs_generated[:-10]
                    # max_length, batch_, item
                    # now mask generated outputs
                    decoder_masked = torch.where(y == 0.0, y.float(),\
                        decoder_outputs_generated.float())
                    decoder_previous_generated[1:] = decoder_masked[:-1]
                    loss += criterion(decoder_masked, decoder_previous_generated,\
                        y.float())
                    total_loss += loss.item()

                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.encoder.parameters(),\
                        self.clip)
                torch.nn.utils.clip_grad_norm_(self.decoder.parameters(),\
                        self.clip)
                self.enc_optimizer.step()
                self.dec_optimizer.step()

            if epoch % 10 == 0:
                self.save_model(self.encoder, self.decoder, self.enc_optimizer,\
                    self.dec_optimizer, epoch, "./models/seq2seq_{}_{}.tar".\
                    format(epoch, total_loss/len(x_train)), total_loss)
            print("Epoch: {} Loss: {}".format(epoch, total_loss))

    def pad_and_sort_batch(self, DataLoaderBatch):
        """
        Pads and sorts the batches, provided as a collate function.

        Keyword arguments:
        DataLoaderBatch - Batch of data coming from dataloader class.
        """
        batch_size = len(DataLoaderBatch)
        batch_split = list(zip(*DataLoaderBatch))

        seqs, targs, lengths, target_lengths = batch_split[0], batch_split[1],\
            batch_split[2], batch_split[3]

        #calculating the size for the minibatches
        max_length = max(lengths)  #longest sequence in X
        max_target_length = max(target_lengths)  #longest sequence in Y
        number_of_chunks = int(max_target_length / self.m_parameter)
        not_in_chunk = max_target_length % self.m_parameter
        words_per_chunk = int(max_length / number_of_chunks)
        not_in_words_per_chunk = max_length % words_per_chunk

        #first zeropad it all
        padded_seqs = np.zeros((batch_size, max_length))
        for i, l in enumerate(lengths):
            padded_seqs[i, 0:l] = seqs[i][0:l]
        new_targets = np.zeros((batch_size, max([len(s) for s in targs]), 10))
        for i, item in enumerate(targs):
            new_targets[i][:len(targs[i])] = targs[i]
        seq_lengths, perm_idx = torch.tensor(lengths).sort(descending=True)
        seq_lengths = list(seq_lengths)
        seq_tensor = padded_seqs[perm_idx]
        target_tensor = new_targets[perm_idx]
        #Full batch is sorted, now we are going to create minibatches.
        #in these batches time comes first, so: [time, batch, features]
        #we also add a vector with lengths, which are necessary for padding
        mini_batches = []  #contains x and y tensor per item
        seq_tensor = np.transpose(seq_tensor, (1, 0))
        target_tensor = np.transpose(target_tensor, (1, 0, 2))
        counter = 0
        for i in range(number_of_chunks):
            x = seq_tensor[i * words_per_chunk:(i + 1) * words_per_chunk]
            y = target_tensor[i * self.m_parameter:(i + 1) * self.m_parameter]
            counter += words_per_chunk * i
            x_mini_batch_lengths = []
            for j in range(batch_size):
                if seq_lengths[j] > counter and seq_lengths[
                        j] < counter + words_per_chunk:
                    x_mini_batch_lengths.append(seq_lengths[j].item() -
                                                counter)
                elif seq_lengths[j] > counter + words_per_chunk:
                    x_mini_batch_lengths.append(words_per_chunk)
                else:
                    x_mini_batch_lengths.append(0)
            mini_batches.append([
                torch.tensor(x).long(),
                torch.tensor(y), x_mini_batch_lengths
            ])
        if not_in_chunk != 0:
            x = seq_tensor[number_of_chunks * words_per_chunk:]
            y = target_tensor[number_of_chunks * self.m_parameter:]
            x_mini_batch_lengths = []
            counter = number_of_chunks * words_per_chunk
            for j in range(batch_size):
                if seq_lengths[j] > counter and seq_lengths[
                        j] < counter + words_per_chunk:
                    x_mini_batch_lengths.append(seq_lengths[j].item() -
                                                counter)
                elif seq_lengths[j] > counter + words_per_chunk:
                    x_mini_batch_lengths.append(words_per_chunk)
                else:
                    x_mini_batch_lengths.append(0)
            if len(x) > 0 and len(y) > 0:
                mini_batches.append([
                    torch.tensor(x).long(),
                    torch.tensor(y), x_mini_batch_lengths
                ])
        return mini_batches, max_target_length


    def save_model(self, encoder, decoder, enc_optimizer, dec_optimizer,\
        epoch, PATH, loss):
        torch.save(
            {
                'epoch': epoch,
                'encoder_state_dict': encoder.state_dict(),
                'encoder_optimizer_state_dict': enc_optimizer.state_dict(),
                'decoder_state_dict': decoder.state_dict(),
                'decoder_optimizer_state_dict': dec_optimizer.state_dict(),
                'loss': loss,
            }, PATH)