示例#1
0
def main():
    # 加载词库,加载数据集
    voc = Lang('data/WORDMAP.json')
    print("词库数量 " + str(voc.n_words))
    train_data = SaDataset('train', voc)
    val_data = SaDataset('valid', voc)

    # 初始化模型
    encoder = EncoderRNN(voc.n_words, hidden_size, encoder_n_layers, dropout)
    # 将模型使用device进行计算,如果是gpu,则会使用显存,如果是cpu,则会使用内存
    encoder = encoder.to(device)

    # 初始化优化器  优化器的目的是让梯度下降,手段是调整模型的参数,optim是一个pytorch的一个包,adam是一个优化算法,梯度下降
    print('Building optimizers ...')
    '''
    需要优化的参数
    学习率
    '''
    optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    # 基础准确率
    best_acc = 0
    epochs_since_improvement = 0

    # epochs 训练的次数
    for epoch in range(0, epochs):
        # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20
        if epochs_since_improvement == 20:
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(optimizer, 0.8)

        # 训练一次
        train(epoch, train_data, encoder, optimizer)

        # 使用验证集对训练结果进行验证,防止过拟合
        val_acc, val_loss = valid(val_data, encoder)
        print('\n * ACCURACY - {acc:.3f}, LOSS - {loss:.3f}\n'.format(acc=val_acc, loss=val_loss))

        # 检查是否有提升
        is_best = val_acc > best_acc
        best_acc = max(best_acc, val_acc)

        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, encoder, optimizer, val_acc, is_best)

        # Reshuffle samples 将验证集合测试集打乱
        np.random.shuffle(train_data.samples)
        np.random.shuffle(val_data.samples)
示例#2
0
文件: train.py 项目: Ierezell/ExamAi
def init():
    print("\tInitialising sentences")

    print("\t\tLoading and cleaning json files")
    json_of_convs = load_all_json_conv('./Dataset/messages')

    print("\t\tLoading two person convs")
    duo_conversations = get_chat_friend_and_me(json_of_convs)

    print("\t\tMaking two person convs discussions")
    discussions = get_discussions(duo_conversations)

    print("\t\tCreating pairs for training")
    pairs_of_sentences = make_pairs(discussions)
    print(f"\t\t{len(pairs_of_sentences)} different pairs")

    print("\t\tCreating Vocabulary")
    voc = Voc()

    print("\t\tPopulating Vocabulary")
    voc.createVocFromPairs(pairs_of_sentences)
    print(f"\t\tVocabulary of : {voc.num_words} differents words")

    print('\tBuilding encoder and decoder ...')
    embedding = nn.Embedding(voc.num_words, HIDDEN_SIZE)
    encoder = EncoderRNN(HIDDEN_SIZE, embedding, ENCODER_N_LAYERS, DROPOUT)
    decoder = LuongAttnDecoderRNN(ATTN_MODEL, embedding, HIDDEN_SIZE,
                                  voc.num_words, DECODER_N_LAYERS, DROPOUT)
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=LEARNING_RATE)
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=LEARNING_RATE * DECODER_LEARNING_RATIO)
    checkpoint = None
    if LOADFILENAME:
        print("\t\tLoading last training")
        checkpoint = torch.load(LOADFILENAME)
        # If loading a model trained on GPU to CPU
        # checkpoint=torch.load(loadFilename,map_location=torch.device('cpu'))
        encoder_sd = checkpoint['en']
        decoder_sd = checkpoint['de']
        encoder_optimizer_sd = checkpoint['en_opt']
        decoder_optimizer_sd = checkpoint['de_opt']
        embedding_sd = checkpoint['embedding']
        voc.__dict__ = checkpoint['voc_dict']
        print("\t\tPopulating from last training")
        embedding.load_state_dict(embedding_sd)
        encoder.load_state_dict(encoder_sd)
        decoder.load_state_dict(decoder_sd)
        encoder_optimizer.load_state_dict(encoder_optimizer_sd)
        decoder_optimizer.load_state_dict(decoder_optimizer_sd)

    encoder = encoder.to(DEVICE)
    decoder = decoder.to(DEVICE)
    return (encoder, decoder, encoder_optimizer, decoder_optimizer, embedding,
            voc, pairs_of_sentences, checkpoint)
示例#3
0
def main(opts):
    # set manual_seed and build vocab
    print(opts, flush=True)

    setup(opts, opts.seed)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"Usando {device} :)")

    # create a batch training environment that will also preprocess text
    vocab = read_vocab(opts.train_vocab)
    tok = Tokenizer(opts.remove_punctuation == 1, opts.reversed == 1, vocab=vocab, encoding_length=opts.max_cap_length)

    # create language instruction encoder
    encoder_kwargs = {
        'opts': opts,
        'vocab_size': len(vocab),
        'embedding_size': opts.word_embedding_size,
        'hidden_size': opts.rnn_hidden_size,
        'padding_idx': padding_idx,
        'dropout_ratio': opts.rnn_dropout,
        'bidirectional': opts.bidirectional == 1,
        'num_layers': opts.rnn_num_layers
    }
    print('Using {} as encoder ...'.format(opts.lang_embed))
    if 'lstm' in opts.lang_embed:
        encoder = EncoderRNN(**encoder_kwargs)
    else:
        raise ValueError('Unknown {} language embedding'.format(opts.lang_embed))
    print(encoder)

    # create policy model
    policy_model_kwargs = {
        'opts':opts,
        'img_fc_dim': opts.img_fc_dim,
        'img_fc_use_batchnorm': opts.img_fc_use_batchnorm == 1,
        'img_dropout': opts.img_dropout,
        'img_feat_input_dim': opts.img_feat_input_dim,
        'rnn_hidden_size': opts.rnn_hidden_size,
        'rnn_dropout': opts.rnn_dropout,
        'max_len': opts.max_cap_length,
        'max_navigable': opts.max_navigable
    }

    if opts.arch == 'regretful':
        model = Regretful(**policy_model_kwargs)
    elif opts.arch == 'self-monitoring':
        model = SelfMonitoring(**policy_model_kwargs)
    elif opts.arch == 'speaker-baseline':
        model = SpeakerFollowerBaseline(**policy_model_kwargs)
    else:
        raise ValueError('Unknown {} model for seq2seq agent'.format(opts.arch))
    print(model)

    encoder = encoder.to(device)
    model = model.to(device)

    params = list(encoder.parameters()) + list(model.parameters())
    optimizer = torch.optim.Adam(params, lr=opts.learning_rate)

    # optionally resume from a checkpoint
    if opts.resume:
        model, encoder, optimizer, best_success_rate = resume_training(opts, model, encoder, optimizer)

    # if a secondary exp name is specified, this is useful when resuming from a previous saved
    # experiment and save to another experiment, e.g., pre-trained on synthetic data and fine-tune on real data
    if opts.exp_name_secondary:
        opts.exp_name += opts.exp_name_secondary

    feature, img_spec = load_features(opts.img_feat_dir, opts.blind)

    if opts.test_submission:
        assert opts.resume, 'The model was not resumed before running for submission.'
        test_env = ('test', (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size,
                                 splits=['test'], tokenizer=tok), Evaluation(['test'], opts)))
        agent_kwargs = {
            'opts': opts,
            'env': test_env[1][0],
            'results_path': "",
            'encoder': encoder,
            'model': model,
            'feedback': opts.feedback
        }
        agent = PanoSeq2SeqAgent(**agent_kwargs)
        # setup trainer
        trainer = PanoSeq2SeqTrainer(opts, agent, optimizer)
        epoch = opts.start_epoch - 1
        trainer.eval(epoch, test_env)
        return

    # set up R2R environments
    if not opts.train_data_augmentation:
        train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed,
                                 splits=['train'], tokenizer=tok)
    else:
        train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed,
                                 splits=['synthetic'], tokenizer=tok)

    val_craft_splits = ['craft_seen', 'craft_unseen']
    val_splits = ['val_seen', 'val_unseen']
    if opts.craft_eval:
        val_splits += val_craft_splits
    val_envs = {split: (R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size,
                                     splits=[split], tokenizer=tok), Evaluation([split], opts))
                for split in val_splits}
    # create agent
    agent_kwargs = {
        'opts': opts,
        'env': train_env,
        'results_path': "",
        'encoder': encoder,
        'model': model,
        'feedback': opts.feedback
    }
    agent = PanoSeq2SeqAgent(**agent_kwargs)

    # setup trainer
    trainer = PanoSeq2SeqTrainer(opts, agent, optimizer, opts.train_iters_epoch)

    if opts.eval_only:
        success_rate = []
        for val_env in val_envs.items():
            success_rate.append(trainer.eval(opts.start_epoch - 1, val_env, tb_logger=None))
        return

    # set up tensorboard logger
    tb_logger = set_tb_logger(opts.log_dir, opts.exp_name, opts.resume)
    sys.stdout.flush()
    best_success_rate = best_success_rate if opts.resume else 0.0
    for epoch in range(opts.start_epoch, opts.max_num_epochs + 1):
        trainer.train(epoch, train_env, tb_logger)

        if epoch % opts.eval_every_epochs == 0:
            success_rate = []
            for val_env in val_envs.items():
                success_rate.append(trainer.eval(epoch, val_env, tb_logger))

            success_rate_compare = success_rate[1]

            if is_experiment():
                # remember best val_seen success rate and save checkpoint
                is_best = success_rate_compare >= best_success_rate
                best_success_rate = max(success_rate_compare, best_success_rate)
                print("--> Highest val_unseen success rate: {}".format(best_success_rate))
                sys.stdout.flush()

                # save the model if it is the best so far
                save_checkpoint({
                    'opts': opts,
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'encoder_state_dict': encoder.state_dict(),
                    'best_success_rate': best_success_rate,
                    'optimizer': optimizer.state_dict(),
                    'max_episode_len': opts.max_episode_len,
                }, is_best, checkpoint_dir=opts.checkpoint_dir, name=opts.exp_name)

        if opts.train_data_augmentation and epoch == opts.epochs_data_augmentation:
            train_env = R2RPanoBatch(opts, feature, img_spec, batch_size=opts.batch_size, seed=opts.seed,
                                     splits=['train'], tokenizer=tok)

    print("--> Finished training")
示例#4
0
with open("word_index_dict", "rb") as f:
    word_index_dict = pickle.load(f)

with open("index_word_dict", "rb") as f:
    index_word_dict = pickle.load(f)

maxlen_q, maxlen_a = 19, 19
# build the model now
encoder = EncoderRNN(len(word_index_dict) + 1, 1024, 1024)  #.cuda()
decoder = DecoderRNN(1024, 1024, len(index_word_dict) + 2)  #.cuda()
attention = Attention_layer(maxlen_q + 1)  #.cuda()
encoder.eval()
decoder.eval()
attention.eval()
params_encoder,params_decoder,params_attention=\
    list(encoder.parameters()),list(decoder.parameters()),list(attention.parameters())

# load weights into model
with open("weights/encoder", "rb") as f:
    weights_encoder = pickle.load(f)

with open("weights/decoder", "rb") as f:
    weights_decoder = pickle.load(f)

with open("weights/attention", "rb") as f:
    weights_attention = pickle.load(f)

for i in range(len(params_encoder)):
    params_encoder[i].data = weights_encoder[i].data.cpu()

for i in range(len(params_decoder)):
示例#5
0
class VSRN(object):
    """
    rkiros/uvs model
    """

    def __init__(self, opt):
        # tutorials/09 - Image Captioning
        # Build Models
        self.grad_clip = opt.grad_clip
        self.img_enc = EncoderImage(opt.data_name, opt.img_dim, opt.embed_size,
                                    opt.finetune, opt.cnn_type,
                                    use_abs=opt.use_abs,
                                    no_imgnorm=opt.no_imgnorm)
        self.txt_enc = EncoderText(opt.vocab_size, opt.word_dim,
                                   opt.embed_size, opt.num_layers,
                                   use_abs=opt.use_abs)
        if torch.cuda.is_available():
            self.img_enc.cuda()
            self.txt_enc.cuda()
            cudnn.benchmark = True



        #####   captioning elements
        
        self.encoder = EncoderRNN(
            opt.dim_vid,
            opt.dim_hidden,
            bidirectional=opt.bidirectional,
            input_dropout_p=opt.input_dropout_p,
            rnn_cell=opt.rnn_type,
            rnn_dropout_p=opt.rnn_dropout_p)
        self.decoder = DecoderRNN(
            opt.vocab_size,
            opt.max_len,
            opt.dim_hidden,
            opt.dim_word,
            input_dropout_p=opt.input_dropout_p,
            rnn_cell=opt.rnn_type,
            rnn_dropout_p=opt.rnn_dropout_p,
            bidirectional=opt.bidirectional)
        
        self.caption_model = S2VTAttModel(self.encoder, self.decoder)
        
        self.crit = utils.LanguageModelCriterion()
        self.rl_crit = utils.RewardCriterion()

        if torch.cuda.is_available():
            self.caption_model.cuda()


        # Loss and Optimizer
        self.criterion = ContrastiveLoss(margin=opt.margin,
                                         measure=opt.measure,
                                         max_violation=opt.max_violation)
        params = list(self.txt_enc.parameters())
        params += list(self.img_enc.parameters())
        params += list(self.decoder.parameters())
        params += list(self.encoder.parameters())
        params += list(self.caption_model.parameters())

        if opt.finetune:
            params += list(self.img_enc.cnn.parameters())
        self.params = params

        self.optimizer = torch.optim.Adam(params, lr=opt.learning_rate)

        self.Eiters = 0






    def calcualte_caption_loss(self, fc_feats, labels, masks):

        # labels = Variable(labels, volatile=False)
        # masks = Variable(masks, volatile=False)

        torch.cuda.synchronize()
        labels = labels.cuda()
        masks = masks.cuda()

        # if torch.cuda.is_available():
        #     labels.cuda()
        #     masks.cuda()

        seq_probs, _ = self.caption_model(fc_feats, labels, 'train')
        loss = self.crit(seq_probs, labels[:, 1:], masks[:, 1:])


        return loss


    def state_dict(self):
        state_dict = [self.img_enc.state_dict(), self.txt_enc.state_dict()]
        return state_dict

    def load_state_dict(self, state_dict):
        self.img_enc.load_state_dict(state_dict[0])
        self.txt_enc.load_state_dict(state_dict[1])

    def train_start(self):
        """switch to train mode
        """
        self.img_enc.train()
        self.txt_enc.train()

    def val_start(self):
        """switch to evaluate mode
        """
        self.img_enc.eval()
        self.txt_enc.eval()

    def forward_emb(self, images, captions, lengths, volatile=False):
        """Compute the image and caption embeddings
        """
        # Set mini-batch dataset
        #images = Variable(images, volatile=volatile)
        #captions = Variable(captions, volatile=volatile)
        images = Variable(images)
        captions = Variable(captions)
        if torch.cuda.is_available():
            images = images.cuda()
            captions = captions.cuda()

        # Forward

        cap_emb = self.txt_enc(captions, lengths)
        img_emb, GCN_img_emd = self.img_enc(images)
        return img_emb, cap_emb, GCN_img_emd

    def forward_loss(self, img_emb, cap_emb, **kwargs):
        """Compute the loss given pairs of image and caption embeddings
        """
        loss = self.criterion(img_emb, cap_emb)
        # self.logger.update('Le', loss.data[0], img_emb.size(0))
        self.logger.update('Le_retrieval', loss.data[0], img_emb.size(0))
        return loss

    def train_emb(self, images, captions, lengths, ids, caption_labels, caption_masks, *args):
        """One training step given images and captions.
        """
        self.Eiters += 1
        self.logger.update('Eit', self.Eiters)
        self.logger.update('lr', self.optimizer.param_groups[0]['lr'])

        # compute the embeddings
        img_emb, cap_emb, GCN_img_emd = self.forward_emb(images, captions, lengths)




        # calcualte captioning loss
        self.optimizer.zero_grad()

        caption_loss = self.calcualte_caption_loss(GCN_img_emd, caption_labels, caption_masks)



        # measure accuracy and record loss
        self.optimizer.zero_grad()
        retrieval_loss = self.forward_loss(img_emb, cap_emb)


        loss = retrieval_loss + caption_loss


        self.logger.update('Le_caption', caption_loss.data[0], img_emb.size(0))
        self.logger.update('Le', loss.data[0], img_emb.size(0))

        # compute gradient and do SGD step
        loss.backward()
        if self.grad_clip > 0:
            clip_grad_norm(self.params, self.grad_clip)
        self.optimizer.step()
示例#6
0
def main(opts):

    # set manual_seed and build vocab
    setup(opts, opts.seed)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # create a batch training environment that will also preprocess text
    vocab = read_vocab(opts.train_vocab)
    tok = Tokenizer(
        opts.remove_punctuation == 1,
        opts.reversed == 1,
        vocab=vocab,
        encoding_length=opts.max_cap_length,
    )

    # create language instruction encoder
    encoder_kwargs = {
        "opts": opts,
        "vocab_size": len(vocab),
        "embedding_size": opts.word_embedding_size,
        "hidden_size": opts.rnn_hidden_size,
        "padding_idx": padding_idx,
        "dropout_ratio": opts.rnn_dropout,
        "bidirectional": opts.bidirectional == 1,
        "num_layers": opts.rnn_num_layers,
    }
    print("Using {} as encoder ...".format(opts.lang_embed))
    if "lstm" in opts.lang_embed:
        encoder = EncoderRNN(**encoder_kwargs)
    else:
        raise ValueError("Unknown {} language embedding".format(
            opts.lang_embed))
    print(encoder)

    # create policy model
    policy_model_kwargs = {
        "opts": opts,
        "img_fc_dim": opts.img_fc_dim,
        "img_fc_use_batchnorm": opts.img_fc_use_batchnorm == 1,
        "img_dropout": opts.img_dropout,
        "img_feat_input_dim": opts.img_feat_input_dim,
        "rnn_hidden_size": opts.rnn_hidden_size,
        "rnn_dropout": opts.rnn_dropout,
        "max_len": opts.max_cap_length,
        "max_navigable": opts.max_navigable,
    }

    if opts.arch == "self-monitoring":
        model = SelfMonitoring(**policy_model_kwargs)
    elif opts.arch == "speaker-baseline":
        model = SpeakerFollowerBaseline(**policy_model_kwargs)
    else:
        raise ValueError("Unknown {} model for seq2seq agent".format(
            opts.arch))
    print(model)

    encoder = encoder.to(device)
    model = model.to(device)

    params = list(encoder.parameters()) + list(model.parameters())
    optimizer = torch.optim.Adam(params, lr=opts.learning_rate)

    # optionally resume from a checkpoint
    if opts.resume:
        model, encoder, optimizer, best_success_rate = resume_training(
            opts, model, encoder, optimizer)

    # if a secondary exp name is specified, this is useful when resuming from a previous saved
    # experiment and save to another experiment, e.g., pre-trained on synthetic data and fine-tune on real data
    if opts.exp_name_secondary:
        opts.exp_name += opts.exp_name_secondary

    feature, img_spec = load_features(opts.img_feat_dir)

    if opts.test_submission:
        assert (opts.resume
                ), "The model was not resumed before running for submission."
        test_env = (
            "test",
            (
                R2RPanoBatch(
                    opts,
                    feature,
                    img_spec,
                    batch_size=opts.batch_size,
                    splits=["test"],
                    tokenizer=tok,
                ),
                Evaluation(["test"]),
            ),
        )
        agent_kwargs = {
            "opts": opts,
            "env": test_env[1][0],
            "results_path": "",
            "encoder": encoder,
            "model": model,
            "feedback": opts.feedback,
        }
        agent = PanoSeq2SeqAgent(**agent_kwargs)
        # setup trainer
        trainer = PanoSeq2SeqTrainer(opts, agent, optimizer)
        epoch = opts.start_epoch - 1
        trainer.eval(epoch, test_env)
        return

    # set up R2R environments
    if not opts.train_data_augmentation:
        train_env = R2RPanoBatch(
            opts,
            feature,
            img_spec,
            batch_size=opts.batch_size,
            seed=opts.seed,
            splits=["train"],
            tokenizer=tok,
        )
    else:
        train_env = R2RPanoBatch(
            opts,
            feature,
            img_spec,
            batch_size=opts.batch_size,
            seed=opts.seed,
            splits=["synthetic"],
            tokenizer=tok,
        )

    val_envs = {
        split: (
            R2RPanoBatch(
                opts,
                feature,
                img_spec,
                batch_size=opts.batch_size,
                splits=[split],
                tokenizer=tok,
            ),
            Evaluation([split]),
        )
        for split in ["val_seen", "val_unseen"]
    }

    # create agent
    agent_kwargs = {
        "opts": opts,
        "env": train_env,
        "results_path": "",
        "encoder": encoder,
        "model": model,
        "feedback": opts.feedback,
    }
    agent = PanoSeq2SeqAgent(**agent_kwargs)

    # setup trainer
    trainer = PanoSeq2SeqTrainer(opts, agent, optimizer,
                                 opts.train_iters_epoch)

    if opts.eval_beam or opts.eval_only:
        success_rate = []
        for val_env in val_envs.items():
            success_rate.append(
                trainer.eval(opts.start_epoch - 1, val_env, tb_logger=None))
        return

    # set up tensorboard logger
    tb_logger = set_tb_logger(opts.log_dir, opts.exp_name, opts.resume)

    best_success_rate = best_success_rate if opts.resume else 0.0

    for epoch in range(opts.start_epoch, opts.max_num_epochs + 1):
        trainer.train(epoch, train_env, tb_logger)

        if epoch % opts.eval_every_epochs == 0:
            success_rate = []
            for val_env in val_envs.items():
                success_rate.append(trainer.eval(epoch, val_env, tb_logger))

            success_rate_compare = success_rate[1]

            if is_experiment():
                # remember best val_seen success rate and save checkpoint
                is_best = success_rate_compare >= best_success_rate
                best_success_rate = max(success_rate_compare,
                                        best_success_rate)
                print("--> Highest val_unseen success rate: {}".format(
                    best_success_rate))

                # save the model if it is the best so far
                save_checkpoint(
                    {
                        "opts": opts,
                        "epoch": epoch + 1,
                        "state_dict": model.state_dict(),
                        "encoder_state_dict": encoder.state_dict(),
                        "best_success_rate": best_success_rate,
                        "optimizer": optimizer.state_dict(),
                        "max_episode_len": opts.max_episode_len,
                    },
                    is_best,
                    checkpoint_dir=opts.checkpoint_dir,
                    name=opts.exp_name,
                )

        if (opts.train_data_augmentation
                and epoch == opts.epochs_data_augmentation):
            train_env = R2RPanoBatch(
                opts,
                feature,
                img_spec,
                batch_size=opts.batch_size,
                seed=opts.seed,
                splits=["train"],
                tokenizer=tok,
            )

    print("--> Finished training")
示例#7
0
def main():
    train_loader = ChatbotDataset('train')
    val_loader = ChatbotDataset('valid')

    # Initialize word embeddings
    embedding = nn.Embedding(voc.num_words, hidden_size)

    # Initialize encoder & decoder models
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)

    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Initialize optimizers
    print('Building optimizers ...')
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

    # Initializations
    print('Initializing ...')
    batch_time = AverageMeter()  # forward prop. + back prop. time
    losses = AverageMeter()  # loss (per word decoded)

    # Epochs
    for epoch in range(start_epoch, epochs):
        # One epoch's training
        # Ensure dropout layers are in train mode
        encoder.train()
        decoder.train()

        start = time.time()

        # Batches
        for i in range(train_loader.__len__()):
            input_variable, lengths, target_variable, mask, max_target_len = train_loader.__getitem__(i)
            loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder,
                         encoder_optimizer, decoder_optimizer)

            # Keep track of metrics
            losses.update(loss, max_target_len)
            batch_time.update(time.time() - start)

            start = time.time()

            if i % print_every == 0:
                print('[{0}] Epoch: [{1}][{2}/{3}]\t'
                      'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(timestamp(), epoch, i, len(train_loader),
                                                                      batch_time=batch_time,
                                                                      loss=losses))
        # One epoch's validation
        val_loss = validate(val_loader, encoder, decoder)
        print('\n * LOSS - {loss:.3f}\n'.format(loss=val_loss))

        # Initialize search module
        searcher = GreedySearchDecoder(encoder, decoder)
        for sentence in pick_n_valid_sentences(10):
            decoded_words = evaluate(searcher, sentence)
            print('Human: {}'.format(sentence))
            print('Bot: {}'.format(''.join(decoded_words)))

        # Save checkpoint
        if epoch % save_every == 0:
            directory = save_dir
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                'epoch': epoch,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'voc': voc.__dict__
            }, os.path.join(directory, '{}_{}_{}.tar'.format('checkpoint', epoch, val_loss)))
示例#8
0
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument(
        '--hidden_size',
        default=256,
        type=int,
        help='hidden size of encoder/decoder, also word vector size')
    ap.add_argument('--edge_size',
                    default=20,
                    type=int,
                    help='embedding dimension of edges')
    ap.add_argument('--n_iters',
                    default=100000,
                    type=int,
                    help='total number of examples to train on')
    ap.add_argument('--print_every',
                    default=5000,
                    type=int,
                    help='print loss info every this many training examples')
    ap.add_argument(
        '--checkpoint_every',
        default=10000,
        type=int,
        help='write out checkpoint every this many training examples')
    ap.add_argument('--initial_learning_rate',
                    default=0.001,
                    type=int,
                    help='initial learning rate')
    ap.add_argument('--train_files',
                    default='../amr_anno_1.0/data/split/training/*',
                    help='training files.')
    ap.add_argument('--log_dir', default='./log', help='log directory')
    ap.add_argument('--exp_name', default='experiment', help='experiment name')
    ap.add_argument('--batch_size', default=5, type=int, help='batch size')
    ap.add_argument('--load_checkpoint',
                    action='store_true',
                    help='use existing checkpoint')

    args = ap.parse_args()

    logdir = args.log_dir
    exp_dir = logdir + '/' + args.exp_name
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    load_state_file = None
    if args.load_checkpoint:
        max_iter = 0
        state_files = glob.glob(exp_dir + '/*')
        for sf in state_files:
            iter_num = int(sf.split('_')[1].split('.')[0])
            if iter_num > max_iter:
                max_iter = iter_num
                load_state_file = sf
    # Create vocab from training data
    iter_num = 0
    train_files = glob.glob(args.train_files)
    train_pairs = AMR.read_AMR_files(train_files, True)
    amr_vocab, en_vocab = None, None
    state = None
    batch_size = args.batch_size
    hidden_size = args.hidden_size
    edge_size = args.edge_size
    drop = DROPOUT_P
    mlength = MAX_LENGTH
    if load_state_file is not None:
        state = torch.load(load_state_file)
        iter_num = state['iter_num']
        amr_vocab = state['amr_vocab']
        en_vocab = state['en_vocab']
        hidden_size = state['hidden_size']
        edge_size = state['edge_size']
        drop = state['dropout']
        mlength = state['max_length']
        logging.info('loaded checkpoint %s', load_state_file)
    else:
        amr_vocab, en_vocab = make_vocabs(train_pairs)
    encoder = EncoderRNN(amr_vocab.n_nodes, hidden_size).to(device)
    child_sum = ChildSum(amr_vocab.n_edges, edge_size, hidden_size).to(device)
    decoder = AttnDecoderRNN(hidden_size,
                             en_vocab.n_words,
                             dropout_p=drop,
                             max_length=mlength).to(device)

    #load checkpoint
    if state is not None:
        encoder.load_state_dict(state['enc_state'])
        child_sum.load_state_dict(state['sum_state'])
        decoder.load_state_dict(state['dec_state'])

    # set up optimization/loss
    params = list(encoder.parameters()) + list(child_sum.parameters()) + list(
        decoder.parameters())  # .parameters() returns generator
    optimizer = optim.Adam(params, lr=args.initial_learning_rate)
    criterion = nn.NLLLoss()

    #load checkpoint
    if state is not None:
        optimizer.load_state_dict(state['opt_state'])

    start = time.time()
    print_loss_total = 0  # Reset every args.print_every

    while iter_num < args.n_iters:
        num_samples = batch_size
        remaining = args.checkpoint_every - (iter_num % args.checkpoint_every)
        remaining2 = args.print_every - (iter_num % args.print_every)
        if remaining < batch_size:
            num_samples = remaining
        elif remaining2 < batch_size:
            num_samples = remaining2
        iter_num += num_samples
        random_pairs = random.sample(train_pairs, num_samples)
        target_snt = tensors_from_batch(en_vocab, random_pairs)
        loss = train(random_pairs, target_snt, amr_vocab, encoder, child_sum,
                     decoder, optimizer, criterion)
        print_loss_total += loss

        if iter_num % args.checkpoint_every == 0:
            state = {
                'iter_num': iter_num,
                'enc_state': encoder.state_dict(),
                'sum_state': child_sum.state_dict(),
                'dec_state': decoder.state_dict(),
                'opt_state': optimizer.state_dict(),
                'amr_vocab': amr_vocab,
                'en_vocab': en_vocab,
                'hidden_size': hidden_size,
                'edge_size': edge_size,
                'dropout': drop,
                'max_length': mlength
            }
            filename = 'state_%010d.pt' % iter_num
            save_file = exp_dir + '/' + filename
            torch.save(state, save_file)
            logging.debug('wrote checkpoint to %s', save_file)

        if iter_num % args.print_every == 0:
            print_loss_avg = print_loss_total / args.print_every
            print_loss_total = 0
            logging.info(
                'time since start:%s (iter:%d iter/n_iters:%d%%) loss_avg:%.4f',
                time.time() - start, iter_num, iter_num / args.n_iters * 100,
                print_loss_avg)
        args.hidden_size,
        args.n_layers,
        args.dropout,
    )
    decoder = LuongAttnDecoderRNN(
        args.attn_model,
        args.hidden_size,
        len(dataset.out_vocab[0]),
        args.n_layers,
        args.dropout,
    )

    # Initialize optimizers and criterion
    # encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.learning_rate)
    # decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.learning_rate * decoder_learning_ratio)
    encoder_optimizer = optim.Adadelta(encoder.parameters())
    decoder_optimizer = optim.Adadelta(decoder.parameters())
    criterion = nn.CrossEntropyLoss()

    # Move models to GPU
    if args.USE_CUDA:
        encoder.cuda()
        decoder.cuda()

    # train(dataset,
    #      args.batch_size,
    #      args.n_epochs,
    #      encoder,
    #      decoder,
    #      encoder_optimizer,
    #      decoder_optimizer,
示例#10
0
            decoder_input = topi.squeeze().detach()

            if decoder_input.item() == EOS_token:
                break

    loss.backward()
    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / output_len
hidden_size = 256
encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
decoder = AttenDecoderRNN(hidden_size, output_lang.n_words, max_len=MAX_LENGTH, dropout_p=0.1).to(device)

lr = 0.01
encoder_optimizer = optim.SGD(encoder.parameters(), lr=lr)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=lr)

scheduler_encoder = torch.optim.lr_scheduler.StepLR(encoder_optimizer, step_size=1, gamma=0.95)
scheduler_decoder = torch.optim.lr_scheduler.StepLR(decoder_optimizer, step_size=1, gamma=0.95)

criterion = nn.NLLLoss()

n_iters = 1000000

training_pairs = [
    tensorsFromPair(random.choice(pairs)) for i in range(n_iters)
]

print_every = 100
save_every = 1000
示例#11
0
文件: chatbot.py 项目: sdq/chatbot
# Configure training/optimization
clip = 50.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iteration = 4000
print_every = 1
save_every = 500

# Ensure dropout layers are in train mode
encoder.train()
decoder.train()

# Initialize optimizers
print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# If you have cuda, configure cuda to call
for state in encoder_optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()

for state in decoder_optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()
示例#12
0
def main():
    corpus_name = "cornell movie-dialogs corpus"
    corpus = os.path.join("data", corpus_name)

    printLines(os.path.join(corpus, "movie_lines.txt"))

    # Define path to new file
    datafile = os.path.join(corpus, "formatted_movie_lines.txt")
    linefile = os.path.join(corpus, "movie_lines.txt")
    conversationfile = os.path.join(corpus, "movie_conversations.txt")

    # Initialize lines dict, conversations list, and field ids
    MOVIE_LINES_FIELDS = ["lineID", "characterID", "movieID", "character", "text"]
    MOVIE_CONVERSATIONS_FIELDS = ["character1ID", "character2ID", "movieID", "utteranceIDs"]

    # Load lines and process conversations
    preprocess = Preprocess(datafile, linefile, conversationfile, MOVIE_LINES_FIELDS, MOVIE_CONVERSATIONS_FIELDS)
    preprocess.loadLines()
    preprocess.loadConversations()
    preprocess.writeCSV()

    # Load/Assemble voc and pairs
    save_dir = os.path.join("data", "save")
    dataset = Dataset(corpus, corpus_name, datafile)
    voc, pairs = dataset.loadPrepareData()
    
    # # Print some pairs to validate
    # print("\npairs:")
    # for pair in pairs[:10]:
    #   print(pair)

    # Trim voc and pairs
    pairs = dataset.trimRareWords(voc, pairs, MIN_COUNT)

    # Example for validation
    small_batch_size = 5
    batches = dataset.batch2TrainData(voc, [random.choice(pairs) for _ in range(small_batch_size)])
    input_variable, lengths, target_variable, mask, max_target_len = batches

    print("input_variable:", input_variable)
    print("lengths:", lengths)
    print("target_variable:", target_variable)
    print("mask:", mask)
    print("max_target_len:", max_target_len)

  

    # Configure models
    model_name = 'cb_model'
    attn_model = 'dot'
    #attn_model = 'general'
    #attn_model = 'concat'
    hidden_size = 500
    encoder_n_layers = 2
    decoder_n_layers = 2
    dropout = 0.1
    batch_size = 64

    # Set checkpoint to load from; set to None if starting from scratch
    loadFilename = None
    checkpoint_iter = 4000
    #loadFilename = os.path.join(save_dir, model_name, corpus_name,
    #                            '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size),
    #                            '{}_checkpoint.tar'.format(checkpoint_iter))

    if loadFilename:
        # If loading on same machine the model was trained on
        checkpoint = torch.load(loadFilename)
        # If loading a model trained on GPU to CPU
        #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
        encoder_sd = checkpoint['en']
        decoder_sd = checkpoint['de']
        encoder_optimizer_sd = checkpoint['en_opt']
        decoder_optimizer_sd = checkpoint['de_opt']
        embedding_sd = checkpoint['embedding']
        voc.__dict__ = checkpoint['voc_dict']

    print('Building encoder and decoder ...')
    # Initialize word embeddings
    embedding = nn.Embedding(voc.num_words, hidden_size)
    if loadFilename:
        embedding.load_state_dict(embedding_sd)
    # Initialize encoder & decoder models
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
    if loadFilename:
        encoder.load_state_dict(encoder_sd)
        decoder.load_state_dict(decoder_sd)
    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    print('Models built and ready to go!')

    # Configure training/optimization
    clip = 50.0
    teacher_forcing_ratio = 1.0
    learning_rate = 0.0001
    decoder_learning_ratio = 5.0
    n_iteration = 4000
    print_every = 1
    save_every = 500

    # Ensure dropout layers are in train mode
    encoder.train()
    decoder.train()

    # Initialize optimizers
    print('Building optimizers ...')
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
    if loadFilename:
        encoder_optimizer.load_state_dict(encoder_optimizer_sd)
        decoder_optimizer.load_state_dict(decoder_optimizer_sd)

    # Run training iterations
    print("Starting Training!")
    model = Model(dataset.batch2TrainData, teacher_forcing_ratio)
    model.trainIters(model_name, voc, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer,
                     embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size,
                     print_every, save_every, clip, corpus_name, loadFilename)

    # Set dropout layers to eval mode
    encoder.eval()
    decoder.eval()

    # Initialize search module
    searcher = GreedySearchDecoder(encoder, decoder)
示例#13
0
def main(args):
    torch.cuda.set_device(6)
    model_path = args.model_path
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    # load vocablary
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    img_path = args.img_path
    factual_cap_path = args.factual_caption_path
    humorous_cap_path = args.humorous_caption_path

    # import data_loader
    data_loader = get_data_loader(img_path, factual_cap_path, vocab,
                                  args.caption_batch_size)
    styled_data_loader = get_styled_data_loader(humorous_cap_path, vocab,
                                                args.language_batch_size)

    # import models
    emb_dim = args.emb_dim
    hidden_dim = args.hidden_dim
    factored_dim = args.factored_dim
    vocab_size = len(vocab)
    encoder = EncoderRNN(voc_size=vocab_size,
                         emb_size=emb_dim,
                         hidden_size=emb_dim)
    decoder = FactoredLSTM(emb_dim, hidden_dim, factored_dim, vocab_size)

    if torch.cuda.is_available():
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    # loss and optimizer
    criterion = masked_cross_entropy
    cap_params = list(decoder.parameters()) + list(encoder.parameters())
    lang_params = list(decoder.S_hc.parameters()) + list(decoder.S_hf.parameters()) \
                  + list(decoder.S_hi.parameters()) + list(decoder.S_ho.parameters())
    optimizer_cap = torch.optim.Adam(cap_params, lr=args.lr_caption)
    optimizer_lang = torch.optim.Adam(lang_params, lr=args.lr_language)

    # train
    total_cap_step = len(data_loader)
    total_lang_step = len(styled_data_loader)
    epoch_num = args.epoch_num
    for epoch in range(epoch_num):
        # caption
        for i, (messages, m_lengths, targets,
                t_lengths) in enumerate(data_loader):

            messages = to_var(messages.long())
            targets = to_var(targets.long())

            # forward, backward and optimize
            decoder.zero_grad()
            encoder.zero_grad()
            output, features = encoder(messages, list(m_lengths))
            outputs = decoder(targets, features, mode="factual")
            loss = criterion(outputs[:, 1:, :].contiguous(),
                             targets[:, 1:].contiguous(), t_lengths - 1)
            loss.backward()
            optimizer_cap.step()

            # print log
            if i % args.log_step_caption == 0:
                print("Epoch [%d/%d], CAP, Step [%d/%d], Loss: %.4f" %
                      (epoch + 1, epoch_num, i, total_cap_step, loss.data[0]))

        eval_outputs(outputs, vocab)

        # language
        for i, (captions, lengths) in enumerate(styled_data_loader):
            captions = to_var(captions.long())

            # forward, backward and optimize
            decoder.zero_grad()
            outputs = decoder(captions, mode='humorous')
            loss = criterion(outputs, captions[:, 1:].contiguous(),
                             lengths - 1)
            loss.backward()
            optimizer_lang.step()

            # print log
            if i % args.log_step_language == 0:
                print("Epoch [%d/%d], LANG, Step [%d/%d], Loss: %.4f" %
                      (epoch + 1, epoch_num, i, total_lang_step, loss.data[0]))

        # save models
        torch.save(decoder.state_dict(),
                   os.path.join(model_path, 'decoder-%d.pkl' % (epoch + 1, )))

        torch.save(encoder.state_dict(),
                   os.path.join(model_path, 'encoder-%d.pkl' % (epoch + 1, )))
示例#14
0
encoder_kwargs = {
    'opts': opts,
    'vocab_size': len(vocab),
    'embedding_size': opts.word_embedding_size,
    'hidden_size': opts.rnn_hidden_size,
    'padding_idx': padding_idx,
    'dropout_ratio': opts.rnn_dropout,
    'bidirectional': opts.bidirectional == 1,
    'num_layers': opts.rnn_num_layers
}

# Model setup
torch.no_grad()
model = SelfMonitoring(**policy_model_kwargs).cuda()
encoder = EncoderRNN(**encoder_kwargs).cuda()
params = list(encoder.parameters()) + list(model.parameters())
optimizer = torch.optim.Adam(params, lr=opts.learning_rate)
resume_training(opts, model, encoder, optimizer)
model.eval()
# model.device = torch.device("cpu")
encoder.eval()
# encoder.device = torch.device("cpu")
resnet = models.resnet152(pretrained=True)
resnet.eval()
resnet.cuda()

# Gibson setup
config = parse_config('ped.yaml')

def transform_img(im):
    ''' Prep gibson rgb input for pytorch model '''
示例#15
0
# word embedding
embedding = nn.Embedding(VOC.num_words, hp.hidden_size)

encoder = EncoderRNN(hp.hidden_size, embedding, hp.n_layers, hp.dropout)
decoder = LuongAttnDecoderRNN(hp.attn_model, embedding, hp.hidden_size,
                              VOC.num_words, hp.n_layers, hp.dropout)

encoder = encoder.to(device)
decoder = decoder.to(device)
print('Models built and ready to go!')

encoder.train()
decoder.train()

print('Building optimizers ...')
encoder_optimizer = optim.Adam(encoder.parameters(), lr=hp.lr)
decoder_optimizer = optim.Adam(decoder.parameters(),
                               lr=hp.lr * hp.decoder_learning_ratio)
encoder_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    encoder_optimizer, 5)
decoder_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    decoder_optimizer, 5)
if loadFilename:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

print("Starting Training!")
trainIters(hp.model_name, VOC, pairs, encoder, decoder, encoder_optimizer,
           decoder_optimizer, embedding, hp.n_layers, hp.n_layers,
           'savedModels/checkpoint', hp.n_iteration, hp.batch_size,
           hp.print_every, hp.save_every, hp.clip, 'persuade', loadFilename)
示例#16
0
def main():
    input_lang = Lang('data/WORDMAP_en.json')
    output_lang = Lang('data/WORDMAP_zh.json')
    print("input_lang.n_words: " + str(input_lang.n_words))
    print("output_lang.n_words: " + str(output_lang.n_words))

    train_data = TranslationDataset('train')
    val_data = TranslationDataset('valid')

    # Initialize encoder & decoder models
    encoder = EncoderRNN(input_lang.n_words, hidden_size, encoder_n_layers,
                         dropout)
    decoder = LuongAttnDecoderRNN(attn_model, hidden_size, output_lang.n_words,
                                  decoder_n_layers, dropout)

    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Initialize optimizers
    print('Building optimizers ...')
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

    # Initializations
    print('Initializing ...')
    train_batch_time = ExpoAverageMeter()  # forward prop. + back prop. time
    train_losses = ExpoAverageMeter()  # loss (per word decoded)
    val_batch_time = ExpoAverageMeter()
    val_losses = ExpoAverageMeter()

    best_loss = 100000
    epochs_since_improvement = 0

    # Epochs
    for epoch in range(start_epoch, epochs):
        # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20
        if epochs_since_improvement == 20:
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(decoder_optimizer, 0.8)
            adjust_learning_rate(encoder_optimizer, 0.8)

        # One epoch's training
        # Ensure dropout layers are in train mode
        encoder.train()
        decoder.train()

        start = time.time()

        # Batches
        for i_batch in range(len(train_data)):
            input_variable, lengths, target_variable, mask, max_target_len = train_data[
                i_batch]
            train_loss = train(input_variable, lengths, target_variable, mask,
                               max_target_len, encoder, decoder,
                               encoder_optimizer, decoder_optimizer)

            # Keep track of metrics
            train_losses.update(train_loss)
            train_batch_time.update(time.time() - start)

            start = time.time()

            # Print status
            if i_batch % print_every == 0:
                print(
                    '[{0}] Epoch: [{1}][{2}/{3}]\t'
                    'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                        timestamp(),
                        epoch,
                        i_batch,
                        len(train_data),
                        batch_time=train_batch_time,
                        loss=train_losses))

        # One epoch's validation
        start = time.time()

        # Batches
        for i_batch in range(len(val_data)):
            input_variable, lengths, target_variable, mask, max_target_len = val_data[
                i_batch]
            val_loss = valid(input_variable, lengths, target_variable, mask,
                             max_target_len, encoder, decoder)

            # Keep track of metrics
            val_losses.update(val_loss)
            val_batch_time.update(time.time() - start)

            start = time.time()

            # Print status
            if i_batch % print_every == 0:
                print(
                    'Validation: [{0}/{1}]\t'
                    'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                        i_batch,
                        len(val_data),
                        batch_time=val_batch_time,
                        loss=val_losses))

        val_loss = val_losses.avg
        print('\n * LOSS - {loss:.3f}\n'.format(loss=val_loss))

        # Check if there was an improvement
        is_best = val_loss < best_loss
        best_loss = min(best_loss, val_loss)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        save_checkpoint(epoch, encoder, decoder, encoder_optimizer,
                        decoder_optimizer, input_lang, output_lang, val_loss,
                        is_best)

        # Initialize search module
        searcher = GreedySearchDecoder(encoder, decoder)
        for input_sentence, target_sentence in pick_n_valid_sentences(
                input_lang, output_lang, 10):
            decoded_words = evaluate(searcher, input_sentence, input_lang,
                                     output_lang)
            print('> {}'.format(input_sentence))
            print('= {}'.format(target_sentence))
            print('< {}'.format(''.join(decoded_words)))

        # Reshuffle train and valid samples
        np.random.shuffle(train_data.samples)
        np.random.shuffle(val_data.samples)
示例#17
0
def train(x,
          y,
          optimizer=optim.Adam,
          criterion=nn.MSELoss(),
          n_steps=100,
          attn_model="general",
          hidden_size=128,
          n_layers=1,
          dropout=0,
          batch_size=50,
          elr=0.001,
          dlr=0.005,
          clip=50.0,
          print_every=10,
          teacher_forcing_ratio=lambda x: 1 if x < 10 else 0):
    # Configure training/optimization
    encoder_learning_rate = elr
    decoder_learning_ratio = dlr

    # Initialize models
    encoder = EncoderRNN(1, hidden_size, n_layers, dropout=dropout)
    decoder = LuongAttnDecoderRNN(attn_model,
                                  1,
                                  hidden_size,
                                  n_layers,
                                  dropout=dropout)

    # Initialize optimizers and criterion
    encoder_optimizer = optimizer(encoder.parameters(),
                                  lr=encoder_learning_rate)
    decoder_optimizer = optimizer(decoder.parameters(),
                                  lr=decoder_learning_ratio)

    # Move models to GPU
    if USE_CUDA:
        encoder.cuda()
        decoder.cuda()

    # Begin!
    print_loss_total = 0
    step = 0
    while step < n_steps:
        step += 1
        # Get training data for this cycle
        batch_idx = np.random.randint(0, x.shape[1], batch_size)
        input_batches, target_batches = x[:, batch_idx], y[:, batch_idx]

        # Run the train function
        loss, _ = _train(input_batches,
                         target_batches,
                         encoder,
                         decoder,
                         encoder_optimizer,
                         decoder_optimizer,
                         criterion,
                         teacher_forcing_ratio=teacher_forcing_ratio(step),
                         clip=clip)
        # print(np.mean(np.square((output.data.cpu().numpy() - series[-20:,  batch_idx]))))
        # Keep track of loss
        print_loss_total += loss

        if step % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print_summary = '(%d %d%%) %.4f' % (step, step / n_steps * 100,
                                                print_loss_avg)
            print(print_summary)
    return encoder, decoder