Пример #1
0
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_lang, output_lang, pairs = prepareData('eng',
                                                 'fra',
                                                 True,
                                                 dir='data',
                                                 filter=False)
    hidden_size = 512
    batch_size = 64
    iters = 50000
    # encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    encoder = EncoderRNN(input_lang.n_words, hidden_size)
    attn_decoder = AttnDecoderRNN(hidden_size,
                                  output_lang.n_words,
                                  dropout_p=0.1)
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        encoder = nn.DataParallel(encoder)
        attn_decoder = nn.DataParallel(attn_decoder)
    encoder = encoder.to(device)
    attn_decoder = attn_decoder.to(device)

    # attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, dropout_p=0.1).to(device)
    trainIters(device,
               pairs,
               input_lang,
               output_lang,
               encoder,
               attn_decoder,
               batch_size,
               iters,
               print_every=250)
Пример #2
0
def main():
    # Load vocabulary wrapper.
    with open(vocab_path) as f:
        vocab = pickle.load(f)

    encoder = EncoderCNN(4096, embed_dim)
    encoder.load_state_dict(torch.load('searchimage.pkl'))
    for p in encoder.parameters():
        p.requires_grad = False

    word_encoder = EncoderRNN(embed_dim, embed_dim, len(vocab), num_layers_rnn)
    word_encoder.load_state_dict(torch.load('searchword.pkl'))
    if torch.cuda.is_available():
        encoder.cuda()
        word_encoder.cuda()
    # Loss and Optimizer
    criterion = nn.MSELoss()
    params = list(
        word_encoder.parameters())  # + list(encoder.linear.parameters())
    optimizer = torch.optim.Adam(params, lr=2e-6, weight_decay=0.001)

    #load data
    with open(image_data_file) as f:
        image_data = pickle.load(f)
    image_features = si.loadmat(image_feature_file)

    img_features = image_features['fc7'][0]
    img_features = np.concatenate(img_features)

    print 'here'
    iteration = 0

    for i in range(10):  # epoch
        use_caption = i % 5
        print 'Epoch', i
        losses = []
        for x, y in make_mini_batch(img_features,
                                    image_data,
                                    use_caption=use_caption):
            encoder.zero_grad()
            word_encoder.zero_grad()

            word_padding, lengths = make_word_padding(y, vocab)
            x = Variable(torch.from_numpy(x).cuda())
            word_index = Variable(torch.from_numpy(word_padding).cuda())

            features = encoder(x)
            outputs = word_encoder(word_index, lengths)
            loss = torch.mean((features - outputs).pow(2))
            loss.backward()
            optimizer.step()
            losses.append(loss.data[0])
            if iteration % 100 == 0:
                print 'loss', sum(losses) / float(len(losses))
                losses = []

            iteration += 1

        torch.save(word_encoder.state_dict(), 'searchword.pkl')
        torch.save(encoder.state_dict(), 'searchimage.pkl')
Пример #3
0
def train(args, data, bidaf):
    device = torch.device(
        f"cuda:{args.gpu}" if torch.cuda.is_available() else "cpu")
    utte_encoder = EncoderRNN(args, data.WORD.vocab.vectors).to(device)
    span_encoder = EncoderRNN(args, data.WORD.vocab.vectors).to(device)
    decoder = AttnDecoderRNN(args, data.WORD.vocab.vectors).to(device)

    utte_encoder_optimizer = optim.SGD(encoder.parameters(),
                                       lr=args.learning_rate)
    span_encoder_optimizer = optim.SGD(encoder.parameters(),
                                       lr=args.learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=args.learning_rate)
    criterion = nn.NLLLoss()

    n_iters = 10 * len(data.train.examples)
    plot_loss_total = []
    print_every = 10000
    for iter in range(1, n_iters + 1):
        input_tensor = data.train.examples[i].q_word
        target_tensor = data.train.examples[i].ans
        span = ata.train.examples[i].span
        loss = train_each(input_tensor, target_tensor, utte_encoder,
                          span_encoder, decoder, utte_encoder_optimizer,
                          span_encoder_optimizer, decoder_optimizer, criterion)
        print_loss += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' %
                  (timeSince(start, iter / n_iters), iter,
                   iter / n_iters * 100, print_loss_avg))
Пример #4
0
 def __init__(self, wm, input_length, batch_size, hidden_size, bidirectional\
         , embedding_size, n_parameter, m_parameter, learning_rate, clip,\
             alpha, beta, pre_trained_file = None):
     self.batch_size = batch_size
     self.hidden_size = hidden_size
     self.embedding_size = embedding_size
     self.bidirectional = bidirectional
     self.n_parameter = n_parameter
     self.m_parameter = m_parameter
     self.learning_rate = learning_rate
     self.wm = wm
     self.clip = clip
     self.alpha = alpha
     self.beta = beta
     if pre_trained_file == None:
         self.encoder = EncoderRNN(self.wm, self.embedding_size,\
             hidden_size, bidirectional)
         self.decoder = AttnDecoderRNN(self.hidden_size, 10)
         self.enc_optimizer = optim.Adam(self.encoder.parameters(),\
             lr=self.learning_rate)
         self.dec_optimizer = optim.Adam(self.decoder.parameters(),\
             lr=self.learning_rate)
         self.start = 0
     else:
         self.resume_training = True
         self.encoder, self.decoder, self.enc_optimizer, self.dec_optimizer,\
             self.start = self.load_model_state(pre_trained_file)
     self.decoder = self.decoder.to(device)
     self.encoder = self.encoder.to(device)
Пример #5
0
def example_test():
    encoder_test = EncoderRNN(10, 10, 2, max_length=3)
    decoder_test = AttnDecoderRNN('general', 10, 10, 2)
    print(encoder_test)
    print(decoder_test)

    encoder_hidden = encoder_test.init_hidden(batch_size=4)
    # word_input = Variable(torch.LongTensor([[1, 2, 3]]))
    word_input = Variable(torch.LongTensor(
        [[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]]))
    if USE_CUDA:
        encoder_test.cuda()
        word_input = word_input.cuda()
        encoder_hidden = encoder_hidden.cuda()
    encoder_outputs, encoder_hidden = encoder_test(
        word_input, encoder_hidden)  # S B H, L B H
    print(encoder_outputs.shape, encoder_hidden.shape)
    # word_inputs = Variable(torch.LongTensor([[1, 2, 3]]))
    word_inputs = Variable(torch.LongTensor(
        [[1, 2, 3], [4, 5, 6], [1, 2, 3], [4, 5, 6]]))
    decoder_attns = torch.zeros(4, 3, 3)
    decoder_hidden = encoder_hidden
    decoder_context = Variable(torch.zeros(4, decoder_test.hidden_size))

    if USE_CUDA:
        decoder_test.cuda()
        word_inputs = word_inputs.cuda()
        decoder_context = decoder_context.cuda()

    for i in range(3):
        decoder_output, decoder_context, decoder_hidden, decoder_attn = decoder_test(
            word_inputs[:, i], decoder_context, decoder_hidden, encoder_outputs)
        print(decoder_output.size(), decoder_hidden.size(), decoder_attn.size())
        decoder_attns[:, i, :] = decoder_attn.squeeze(1).cpu().data
def load_model():
    encoder = EncoderRNN(human_n_chars, hidden_size, n_layers)
    decoder = AttnDecoderRNN(attn_model,
                             hidden_size,
                             machine_n_chars,
                             n_layers,
                             dropout_p=dropout_p)
    encoder.load_state_dict(t.load('encoder.pth'))
    decoder.load_state_dict(t.load('decoder.pth'))
    return encoder, decoder
Пример #7
0
def epoch_training(train_iter,
                   val_iter,
                   num_epoch=100,
                   learning_rate=1e-4,
                   hidden_size=100,
                   early_stop=False,
                   patience=2,
                   epsilon=1e-4):
    # define model
    encoder = EncoderRNN(input_size=len(EN.vocab), hidden_size=hidden_size)
    decoder = DecoderRNN(hidden_size=hidden_size, output_size=len(DE.vocab))

    # define loss criterion
    criterion = nn.NLLLoss(ignore_index=PAD_token)
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

    losses = np.ndarray(patience)

    res_loss = 13
    res_encoder = None
    res_decoder = None
    res_epoch = 0
    base_bleu = 0
    not_updated = 0

    for epoch in range(num_epoch):
        tl = train(train_iter, encoder, decoder, encoder_optimizer,
                   decoder_optimizer, criterion)
        loss, val_bleu = evaluate(val_iter, encoder, decoder, criterion)
        logging.warning('******Epoch: ' + str(epoch) + ' Training Loss: ' +
                        str(tl) + ' Validation Loss: ' + str(loss) +
                        ' Validation Bleu: ' + str(val_bleu) + '*********')
        #save the model with the lowest validation loss
        if base_bleu <= val_bleu:
            base_bleu = val_bleu
            res_loss = loss
            res_encoder = encoder
            res_decoder = decoder
            res_epoch = epoch
            not_updated = 0
            logging.warning('Updated validation loss as ' + str(res_loss) +
                            'With validation Bleu as ' + str(base_bleu) +
                            ' at epoch ' + str(res_epoch))
        else:
            not_updated += 1
        if not_updated == patience:
            break
    print('Stop at Epoch: ' + str(res_epoch) + ", With Validation Loss: " +
          str(res_loss) + ", Validation Bleu: " + str(base_bleu))
    logging.warning('Stop at Epoch: ' + str(res_epoch) +
                    ", With Validation Loss: " + str(res_loss) +
                    ", Validation Bleu: " + str(base_bleu))
    return res_loss, res_encoder, res_decoder, base_bleu
Пример #8
0
def load_model_from_file(voc, file):
    # 从本地加载模型
    checkpoint = torch.load(file)
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    # 加载词向量
    embedding = nn.Embedding(voc.num_words, hidden_size)
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
    return encoder, decoder
Пример #9
0
def main():
    # data
    input_lang, output_lang, pairs = prepare_data('eng', 'fra', reverse=True)

    encoder = EncoderRNN(input_lang.n_words, hidden_size, use_cuda)
    if use_attention:
        decoder = AttentionDecoderRNN(hidden_size, output_lang.n_words,
                                      use_cuda)
    else:
        decoder = DecoderRNN(hidden_size, output_lang.n_words, use_cuda)

    if use_cuda:
        encoder, decoder = encoder.cuda(), decoder.cuda()

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

    plot_losses = []
    print_total_loss = 0.
    plot_total_loss = 0.

    criterion = nn.CrossEntropyLoss()
    encoder_schedule = MultiStepLR(encoder_optimizer, [40000, 60000])
    decoder_schedule = MultiStepLR(decoder_optimizer, [40000, 60000])

    for iter in tqdm(range(1, num_iters + 1)):
        encoder_schedule.step()
        decoder_schedule.step()
        input_variable, output_variable = variable_from_pair(
            input_lang, output_lang, random.choice(pairs), use_cuda)
        loss = train(input_variable, output_variable, encoder, decoder,
                     encoder_optimizer, decoder_optimizer, criterion)
        print_total_loss += loss
        plot_total_loss += loss

        if iter % print_every == 0:
            print_avg_loss = print_total_loss / print_every
            print_total_loss = 0
            tqdm.write("iter: {} Percent: {}% Loss: {}".format(
                iter, round(100 * iter / num_iters, 2), print_avg_loss))

        if iter % plot_every == 0:
            plot_avg_loss = plot_total_loss / plot_every
            plot_losses.append(plot_avg_loss)
            plot_total_loss = 0

    show_plot(plot_losses)
    evaluate_randomly(encoder, decoder, input_lang, output_lang, pairs)
def train(**kwargs):
    opt = Config()
    for k, v in kwargs.items(): #设置参数
        setattr(opt, k, v)   
    if(opt.use_gpu):
        torch.cuda.empty_cache()#清空缓存
    # 数据
    dataloader,datas = get_loader(opt) 
    datas = dataloader.dataset.datas
    word2ix = datas['word2ix']
    sos = word2ix.get(datas.get('sos'))
    voc_length = len(word2ix)
    #定义模型
    encoder = EncoderRNN(opt, voc_length)
    decoder = AttentionDecoderRNN(opt, voc_length)
    #加载断点,从上次结束地方开始
    if opt.model_ckpt:
        checkpoint = torch.load(opt.model_ckpt)
        encoder.load_state_dict(checkpoint['en'])
        decoder.load_state_dict(checkpoint['de']) 
    #切换模式
    encoder = encoder.to(opt.device)
    decoder = decoder.to(opt.device)
    encoder.train()
    decoder.train()
    #定义优化器(注意与encoder.to(device)前后不要反)
    encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=opt.learning_rate)
    decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=opt.learning_rate * opt.decoder_learning_ratio)
    if opt.model_ckpt:
        encoder_optimizer.load_state_dict(checkpoint['en_opt'])
        decoder_optimizer.load_state_dict(checkpoint['de_opt']) 
    #定义打印loss的变量
    print_loss = 0   
    for epoch in range(opt.epoch):
        for i, data in enumerate(dataloader):
            #取一个batch训练
            loss = train_by_batch(sos, opt, data, encoder_optimizer, decoder_optimizer, encoder, decoder)
            print_loss += loss
            #打印损失   
            if i % opt.print_every == 0:
                print_loss_avg = print_loss / opt.print_every
                print("Epoch: {}; Epoch Percent complete: {:.4f}%; Average loss: {:.8f}"
                .format(epoch, epoch / opt.epoch * 100, print_loss_avg))
                print_loss = 0                
        # 保存checkpoint
        if epoch % opt.save_every == 0:
            checkpoint_path = '{prefix}_{time}'.format(prefix=opt.prefix,time=time.strftime('%m%d_%H%M'))
            torch.save({
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
            }, checkpoint_path)
Пример #11
0
def main():
    ### load word embedding
    pickle_file = open(embedding_path, "rb")
    word_embedding = pickle.load(pickle_file)
    pickle_file.close()

    word_index = word_embedding[0]
    embedding_map = word_embedding[1]
    output_size = len(word_index)

    ### initialize model
    hidden_size = 100
    encoder = EncoderRNN(hidden_size)
    decoder = DecoderRNN(hidden_size, output_size)

    ### load train data
    parser = AcademicParser("../train_data/Academic_papers/docs.json")
    abstracts = parser.get_paperAbstract()
    titles = parser.get_title()
    assert (len(abstracts) == len(titles))

    ### prepare train data
    train_set = []
    for i in range(len(abstracts)):
        abstract = abstracts[i]
        title = titles[i]
        new_pair = variablesFromPair((abstract, title), word_index,
                                     embedding_map)
        if (len(new_pair[1]) > 0):
            train_set.append(new_pair)

    trainIters(encoder, decoder, 20000, train_set)
Пример #12
0
 def test_input_dropout_WITH_PROB_ZERO(self):
     rnn = EncoderRNN(self.vocab_size, None, 50, 16, input_dropout_p=0, n_layers=3,\
                      bidirectional=True, rnn_cell_name='lstm')
     print rnn
     for param in rnn.parameters():
         param.data.uniform_(-1, 1)
     output1, _ = rnn(self.input_var, self.lengths)
     if isinstance(_, tuple):
         #print 'outputs', [elem.size() for elem in output1]
         print 'outputs', output1.size()
         print 'hidden', [elem.size() for elem in _]
     else:
         print 'outputs', output1.size()
         print 'hidden', _.size()
     output2, _ = rnn(self.input_var, self.lengths)
     self.assertTrue(torch.equal(output1.data, output2.data))
Пример #13
0
def get_encoder_decoder(vocab):
    """ Given the arguments, returns the correct combination of CNN/RNN/GAN encoders and decoders. """
    if args.pretrain_rnn:
        encoder = EncoderRNN(len(vocab),
                             args.embed_size,
                             args.encoder_rnn_hidden_size,
                             num_layers=args.num_layers).to(device)
    elif args.gan_embedding:
        gan = torch.load('DCGAN_embed_2.tch').to(device)
        encoder = gan.discriminator
    elif args.progan_embedding:
        pro_gan = pg.ProGAN(depth=7,
                            latent_size=256,
                            device=torch.device('cuda'))
        pro_gan.dis.load_state_dict(torch.load('progan_weights/GAN_DIS_6.pth'))
        # pro_gan.dis_optim.load_state_dict(torch.load('progan_weights/GAN_DIS_OPTIM_6.pth'))
        pro_gan.gen.load_state_dict(torch.load('progan_weights/GAN_GEN_6.pth'))
        # pro_gan.gen_optim.load_state_dict(torch.load('progan_weights/GAN_GEN_OPTIM_6.pth'))
        pro_gan.gen_shadow.load_state_dict(
            torch.load('progan_weights/GAN_GEN_SHADOW_6.pth'))
        print("Loaded proGAN weights.", flush=True)
        encoder = pro_gan.dis.to(device)
    else:
        encoder = EncoderCNN(args.embed_size).to(device)

    decoder = DecoderRNNOld(args.embed_size,
                            args.decoder_rnn_hidden_size,
                            len(vocab),
                            args.num_layers,
                            vocab,
                            device=device).to(device)
    return encoder, decoder
Пример #14
0
 def load_model_state(self, model_file):
     print("Resuming training from a given model...")
     model = torch.load(model_file, map_location=lambda storage, loc: storage)
     epoch = model['epoch']
     encoder_state_dict = model['encoder_state_dict']
     encoder_optimizer_state_dict = model['encoder_optimizer_state_dict']
     decoder_state_dict = model['decoder_state_dict']
     decoder_optimizer_state_dict = model['decoder_optimizer_state_dict']
     loss = model['loss']
     encoder = EncoderRNN(self.wm, self.embedding_size,\
         self.hidden_size, self.bidirectional)
     decoder = AttnDecoderRNN("general", self.hidden_size, 10)
     enc_optimizer = optim.Adam(encoder.parameters(), lr=self.learning_rate)
     dec_optimizer = optim.Adam(decoder.parameters(), lr=self.learning_rate)
     
     return encoder, decoder, enc_optimizer, dec_optimizer, epoch
def test(opt):

    # 数据
    dataloader = get_dataloader(opt)
    _data = dataloader.dataset._data
    word2ix, ix2word = _data['word2ix'], _data['ix2word']
    sos = word2ix.get(_data.get('sos'))
    eos = word2ix.get(_data.get('eos'))
    unknown = word2ix.get(_data.get('unknown'))
    voc_length = len(word2ix)

    #定义模型
    encoder = EncoderRNN(opt, voc_length)
    decoder = LuongAttnDecoderRNN(opt, voc_length)

    #加载模型
    if opt.model_ckpt == None:
        raise ValueError('model_ckpt is None.')
        return False
    checkpoint = torch.load(opt.model_ckpt, map_location=lambda s, l: s)
    encoder.load_state_dict(checkpoint['en'])
    decoder.load_state_dict(checkpoint['de'])

    with torch.no_grad():
        #切换模式
        encoder = encoder.to(opt.device)
        decoder = decoder.to(opt.device)
        encoder.eval()
        decoder.eval()
        #定义seracher
        searcher = GreedySearchDecoder(encoder, decoder)
        return searcher, sos, eos, unknown, word2ix, ix2word
Пример #16
0
def runTest(n_layers, hidden_size, reverse, modelFile, beam_size, inp, corpus):
    torch.set_grad_enabled(False)

    voc, pairs = loadPrepareData(corpus)
    embedding = nn.Embedding(voc.num_words, hidden_size)
    encoder = EncoderRNN(hidden_size, embedding, n_layers)
    attn_model = 'dot'
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, n_layers)

    checkpoint = torch.load(modelFile,
                            map_location=lambda storage, loc: storage)
    encoder.load_state_dict(checkpoint['en'])
    decoder.load_state_dict(checkpoint['de'])

    # train mode set to false, effect only on dropout, batchNorm
    encoder.train(False)
    decoder.train(False)

    encoder = encoder.to(device)
    decoder = decoder.to(device)

    if inp:
        evaluateInput(encoder, decoder, voc, beam_size)
    else:
        evaluateRandomly(encoder, decoder, voc, pairs, reverse, beam_size, 20)
Пример #17
0
 def __init__(self, config, dataset):
     self.config = config
     self.n_epochs = config.n_epochs
     self.encoder = EncoderRNN(n_dict=dataset.source.n_words, config=config)
     self.decoder = AttnDecoderRNN(n_dict=dataset.target.n_words,
                                   config=config)
     self.encoder_optimizer = config.optimizier(self.encoder.parameters(),
                                                lr=config.learning_rate)
     self.decoder_optimizer = config.optimizier(self.decoder.parameters(),
                                                lr=config.learning_rate)
     self.criterion = nn.NLLLoss()
     self.is_plot = config.is_plot
     self.clip_value = config.clip_value
     self.losses = []
     if self.config.USE_CUDA:
         self.encoder.cuda(self.config.gpu_id)
     if self.config.USE_CUDA:
         self.decoder.cuda(device_id=self.config.gpu_id)
Пример #18
0
def main():
    input_file = sys.argv[1]
    vocab = build.build_vocabulary(input_file)
    pairs = [tensors_from_pair(vocab, x.split("\t")) for x in open(input_file)]
    pairs = [(x,y) for x, y in pairs if x.size(0) <= MAX_LENGTH]

    hidden_size = 256
    encoder1 = EncoderRNN(vocab.n_words, hidden_size).to(device)
    attn_decoder1 = AttnDecoderRNN(hidden_size, vocab.n_words, dropout_p=0.1).to(device)
    train_iter(pairs, encoder1, attn_decoder1, 75000, print_every=100)
Пример #19
0
    def setUpClass(cls):
        cls.pre_processing = PreProcessing(sentences)
        cls.dataset = ds.process(cls.pre_processing)
        cls.word_embedding = WordEmbedding(source=cls.dataset.pairs)

        encoder = EncoderRNN(cls.word_embedding, 300, 1).to(settings.device)
        decoder = DecoderRNN(300, cls.word_embedding, 0.0,
                             1).to(settings.device)
        cls.model = Model(encoder, decoder)
        cls.model.train(cls.dataset)
def create_models(config, in_words, out_words):
    logging.info('Creating models...')
    encoder = EncoderRNN(in_words,
                         int(config['hidden_size']),
                         num_layers=int(config['num_layers'])).cuda()

    decoder = AttnDecoderRNN(int(config['hidden_size']),
                             out_words,
                             num_layers=int(config['num_layers']),
                             dropout_p=float(config['dropout_p'])).cuda()
    return encoder, decoder
Пример #21
0
def inference(sentence, language, MODEL_DIR, codersum):
    encoder = EncoderRNN(language.n_words,
                         config.HIDDEN_SIZE,
                         config.NUM_LAYER,
                         max_length=config.MAX_LENGTH + 1)
    decoder = AttnDecoderRNN(config.ATT_MODEL,
                             config.HIDDEN_SIZE,
                             language.n_words,
                             config.NUM_LAYER,
                             dropout_p=config.DROPOUT)

    encoder_path = os.path.join(MODEL_DIR, "encoder_" + str(codersum) + ".pth")
    decoder_path = os.path.join(MODEL_DIR, "decoder_" + str(codersum) + ".pth")
    encoder.load_state_dict(torch.load(encoder_path, map_location="cpu"))
    decoder.load_state_dict(torch.load(decoder_path, map_location="cpu"))
    encoder.eval()
    decoder.eval()
    batch_size = 1

    input_index = indexes_from_sentence(language, sentence)
    input_index = pad_sentence(input_index)  # 填充
    input_variable = torch.LongTensor([input_index])
    encoder_hidden, encoder_cell = encoder.init_hidden(batch_size)
    encoder_outputs, encoder_hidden, encoder_cell = encoder(
        input_variable, encoder_hidden, encoder_cell)

    decoder_input = torch.zeros(batch_size, 1).long()
    decoder_context = torch.zeros(batch_size, decoder.hidden_size)
    decoder_hidden = encoder_hidden
    decoder_cell = encoder_cell
    if config.USE_CUDA:
        decoder_input = decoder_input.cuda()
        decoder_context = decoder_context.cuda()

    decoded_words = []

    # Run through decoder
    for di in range(config.MAX_LENGTH):
        decoder_output, decoder_context, decoder_hidden, decoder_cell, _ = decoder(
            decoder_input, decoder_context, decoder_hidden, decoder_cell,
            encoder_outputs)

        # Choose top word from output
        topv, topi = decoder_output.data.topk(1)
        ni = topi[0][0]
        if ni == 0:
            break
        else:
            decoded_words.append(language.index2word[ni.item()])

        decoder_input = torch.LongTensor([[ni]])
        if config.USE_CUDA:
            decoder_input = decoder_input.cuda()

    return "".join(decoded_words)
Пример #22
0
def main():
    input_lang, output_lang, pairs = prepare_data('ques',
                                                  'ans',
                                                  '../debug.json',
                                                  reverse=False)
    encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    attn_decoder = AttnDecoderRNN(hidden_size,
                                  output_lang.n_words,
                                  dropout_p=0.1,
                                  max_length=1000).to(device)

    rate = 0.9
    pairs_train, pairs_test = pairs[0:int(len(pairs) *
                                          rate)], pairs[int(len(pairs) *
                                                            rate):]
    encoder.load_state_dict(torch.load('model/encoder-0.model'))
    encoder.eval()
    attn_decoder.load_state_dict(torch.load('model/decoder-0.model'))
    attn_decoder.eval()
    evaluate_all(encoder,
                 attn_decoder,
                 pairs_test,
                 max_length=1000,
                 input_lang=input_lang,
                 output_lang=output_lang,
                 n=len(pairs_test))
    # show_plot(loss_history)
    print('done test')
Пример #23
0
def main():
    args = parse_arguments()
    hidden_size = 300
    embed_size = 50
    kld_weight = 0.05
    temperature = 0.9
    use_cuda = torch.cuda.is_available()

    print("[!] preparing dataset...")
    TEXT = data.Field(lower=True, fix_length=30)
    LABEL = data.Field(sequential=False)
    train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
    TEXT.build_vocab(train_data, max_size=250000)
    LABEL.build_vocab(train_data)
    train_iter, test_iter = data.BucketIterator.splits(
        (train_data, test_data), batch_size=args.batch_size, repeat=False)
    vocab_size = len(TEXT.vocab) + 2

    print("[!] Instantiating models...")
    encoder = EncoderRNN(vocab_size,
                         hidden_size,
                         embed_size,
                         n_layers=2,
                         dropout=0.5,
                         use_cuda=use_cuda)
    decoder = DecoderRNN(embed_size,
                         hidden_size,
                         vocab_size,
                         n_layers=2,
                         dropout=0.5,
                         use_cuda=use_cuda)
    vae = VAE(encoder, decoder)
    optimizer = optim.Adam(vae.parameters(), lr=args.lr)
    if use_cuda:
        print("[!] Using CUDA...")
        vae.cuda()

    best_val_loss = None
    for e in range(1, args.epochs + 1):
        train(e, vae, optimizer, train_iter, vocab_size, kld_weight,
              temperature, args.grad_clip, use_cuda, TEXT)
        val_loss = evaluate(vae, test_iter, vocab_size, kld_weight, use_cuda)
        print("[Epoch: %d] val_loss:%5.3f | val_pp:%5.2fS" %
              (e, val_loss, math.exp(val_loss)))

        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            print("[!] saving model...")
            if not os.path.isdir("snapshot"):
                os.makedirs("snapshot")
            torch.save(vae.state_dict(), './snapshot/vae_{}.pt'.format(e))
            best_val_loss = val_loss
Пример #24
0
 def __init__(self, wm, input_length, batch_size, hidden_size, bidirectional, 
                 embedding_size, n_parameter, m_parameter, learning_rate, clip, 
                 alpha, beta, pre_trained_file = None, decoder_type="original", teacher_forcing_ratio=0.7):
     self.batch_size = batch_size
     self.hidden_size = hidden_size
     self.embedding_size = embedding_size
     self.bidirectional = bidirectional
     self.n_parameter = n_parameter
     self.m_parameter = m_parameter
     self.learning_rate = learning_rate
     self.wm = wm
     self.clip = clip
     self.alpha = alpha
     self.beta = beta
     self.loss_list = []
     self.teacher_forcing_ratio = teacher_forcing_ratio
     self.decoder_type = decoder_type
     
     if pre_trained_file == None:
         # define encoder and decoder
         self.encoder = EncoderRNN(self.wm, self.embedding_size, hidden_size, bidirectional, n_layers=1)
         
         # select decoder type
         if self.decoder_type == "original":
             self.decoder = AttnDecoderRNN("general", self.hidden_size, 10)
         elif self.decoder_type == "bahdanau":
             self.decoder = BahdanauAttnDecoderRNN(self.embedding_size, hidden_size, 10, discrete_representation=True)
         
         # define optimizer of encoder and decoder
         self.enc_optimizer = optim.Adam(self.encoder.parameters(), lr=self.learning_rate)
         self.dec_optimizer = optim.Adam(self.decoder.parameters(), lr=self.learning_rate)
         self.start = 1
     else:
         self.resume_training = True
         self.encoder, self.decoder, self.enc_optimizer, self.dec_optimizer,\
             self.start = self.load_model_state(pre_trained_file)      
     self.decoder = self.decoder.to(device)
     self.encoder = self.encoder.to(device)
Пример #25
0
def trainIters(n_iteration,
               learning_rate,
               batch_size,
               n_layers,
               hidden_size,
               attn_model='dot',
               decoder_learning_ratio=5.0):

    voc, pairs = loadPrepareData()

    choise = [random.choice(pairs) for _ in range(batch_size)]
    training_batches = [
        batch2TrainData(voc, choise) for _ in range(n_iteration)
    ]

    # model
    checkpoint = None
    print('Building encoder and decoder ...')
    encoder = EncoderRNN(voc, hidden_size, n_layers)
    attn_model = 'dot'
    decoder = LuongAttnDecoderRNN(voc, attn_model, hidden_size, n_layers)

    # optimizer
    print('Building optimizers ...')
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=learning_rate * decoder_learning_ratio)

    # initialize
    print('Initializing ...')
    start_iteration = 1
    perplexity = []
    print_loss = 0

    for iteration in tqdm(range(start_iteration, n_iteration + 1)):
        training_batch = training_batches[iteration - 1]
        input_variable, lengths, target_variable, mask, max_target_len = training_batch
Пример #26
0
def eval():
    parameter = Config()
    # 加载参数
    save_dir = parameter.save_dir
    loadFilename = parameter.model_ckpt

    pretrained_embedding_path = parameter.pretrained_embedding_path
    dropout = parameter.dropout
    hidden_size = parameter.hidden_size
    num_layers = parameter.num_layers
    attn_model = parameter.method

    max_input_length = parameter.max_input_length
    max_generate_length = parameter.max_generate_length
    embedding_dim = parameter.embedding_dim
    #加载embedding
    voc = read_voc_file('./data/voc.pkl')
    embedding = get_weight(voc,pretrained_embedding_path)
    #输入
    inputs = get_input_line('./test/test.txt')
    input_batches, lengths = get_batch_id(inputs)
    #
    encoder = EncoderRNN(hidden_size, embedding, num_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model,embedding,hidden_size,len(voc),num_layers,dropout)
    if loadFilename == None:
        raise ValueError('model_ckpt is None.')
        return False
    checkpoint = torch.load(loadFilename, map_location=lambda s, l: s)
    print(checkpoint['plt'])
    encoder.load_state_dict(checkpoint['en'])
    decoder.load_state_dict(checkpoint['de'])
    answer =[]
    with torch.no_grad():
        encoder.to(device)
        decoder.to(device)
        #切换到测试模式
        encoder.eval()
        decoder.eval()
        search = GreedySearchDecoder(encoder, decoder)
        for input_batch in input_batches:
            #print(input_batch)
            token,score = generate(input_batch, search, GO_ID, EOS_ID, device)
            print(token)
            answer.append(token)
        print(answer)
    return answer
Пример #27
0
def main():
    input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
    print(random.choice(pairs))

    device = torch.device(args.device)
    print('device : {}'.format(device))

    encoder = EncoderRNN(input_lang.n_words, args.hidden_size).to(device)
    decoder = AttnDecoderRNN(args.hidden_size,
                             output_lang.n_words,
                             dropout_p=0.1).to(device)

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=args.lr)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=args.lr)

    model = Translator(input_lang, output_lang, encoder, decoder,
                       encoder_optimizer, decoder_optimizer)

    trainIters(model, pairs, n_iters=10000, print_every=100, plot_every=100)

    evaluateRandomly(model, pairs)

    output_words, attentions = evaluate(model, "je suis trop froid .")
    plt.matshow(attentions.numpy())
Пример #28
0
def main():
    nIters = 100000
    loadFilename = os.path.join('checkpoints',
                                '{}_{}.tar'.format(nIters, 'checkpoint'))
    checkpoint = torch.load(loadFilename)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
    # If loading a model trained on GPU to CPU
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    hidden_size = 256
    encoder = EncoderRNN(input_lang.n_words, hidden_size, device).to(device)
    decoder = AttnDecoderRNN(hidden_size,
                             output_lang.n_words,
                             device,
                             dropout_p=0.1).to(device)
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    input_lang.__dict__ = checkpoint['input_lang']
    output_lang.__dict__ = checkpoint['output_lang']
    evaluateRandomly(device, pairs, encoder, decoder, input_lang, output_lang)
Пример #29
0
def loadmodel(model_file, wm, hidden_size, bidirectional):
    """
    Loads the trained model, returns the encoder and decoder for inferencing.
    We initialize 'empty models' in which we will load our parameters.
    It is important that the hyperparameters are the same as used for training.

    Keyword arguments:
    model_file - string with the model location
    wm - embedding matrix
    hidden_size - hidden size
    bidirectional - whether we use bidirectional GRU layers
    """
    model = torch.load(model_file, map_location=lambda storage, loc: storage)
    epoch = model['epoch']
    encoder_state_dict = model['encoder_state_dict']
    encoder_optimizer_state_dict = model['encoder_optimizer_state_dict']
    decoder_state_dict = model['decoder_state_dict']
    decoder_optimizer_state_dict = model['decoder_optimizer_state_dict']
    loss = model['loss']
    encoder = EncoderRNN(wm, 300, hidden_size, bidirectional)
    decoder = AttnDecoderRNN(hidden_size, 10)
    enc_optimizer = optim.Adam(encoder.parameters(), lr=0.0001)
    dec_optimizer = optim.Adam(decoder.parameters(), lr=0.0001)
    return encoder, decoder
Пример #30
0
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    input_lang, output_lang, pairs = prepareData('eng', 'fra', True)
    hidden_size = 256
    encoder1 = EncoderRNN(input_lang.n_words, hidden_size, device).to(device)
    attn_decoder1 = AttnDecoderRNN(hidden_size,
                                   output_lang.n_words,
                                   device,
                                   dropout_p=0.1).to(device)
    trainIters(device,
               pairs,
               input_lang,
               output_lang,
               encoder1,
               attn_decoder1,
               100000,
               print_every=5000)