def model_load(self):
     encoder = Encoder(**self.checkpoint['encoder_parameter'])
     decoder = AttentionDecoder(**self.checkpoint['decoder_parameter'])
     model = Seq2Seq(encoder, decoder, self.seq_len, self.get_attention)
     model.load_state_dict(self.checkpoint['model_state_dict'])
     model.to(device)
     model.eval()
     return model
示例#2
0
文件: train.py 项目: bosung/cQA
def main(args):
    global batch_size
    batch_size = args.batch_size
    hidden_size = args.hidden_size
    w_embed_size = args.w_embed_size
    lr = args.lr

    train_file = 'data/train_data_nv.txt'

    vocab = Vocab()
    vocab.build(train_file)

    if args.pre_trained_embed == 'n':
        encoder = Encoder(vocab.n_words, w_embed_size, hidden_size,
                          batch_size).to(device)
        decoder = AttentionDecoder(vocab.n_words, w_embed_size, hidden_size,
                                   batch_size).to(device)
    else:
        # load pre-trained embedding
        weight = vocab.load_weight(path="data/komoran_hd_2times.vec")
        encoder = Encoder(vocab.n_words, w_embed_size, hidden_size, batch_size,
                          weight).to(device)
        decoder = AttentionDecoder(vocab.n_words, w_embed_size, hidden_size,
                                   batch_size, weight).to(device)

    if args.encoder:
        encoder.load_state_dict(torch.load(args.encoder))
        print("[INFO] load encoder with %s" % args.encoder)
    if args.decoder:
        decoder.load_state_dict(torch.load(args.decoder))
        print("[INFO] load decoder with %s" % args.decoder)

    train_data = prep.read_train_data(train_file)
    train_loader = data.DataLoader(train_data,
                                   batch_size=batch_size,
                                   shuffle=True)

    # ev.evaluateRandomly(encoder, decoder, train_data, vocab, batch_size)
    # ev.evaluate_with_print(encoder, vocab, batch_size)

    # initialize
    max_a_at_5, max_a_at_1 = ev.evaluate_similarity(encoder,
                                                    vocab,
                                                    batch_size,
                                                    decoder=decoder)
    # max_a_at_5, max_a_at_1 = 0, 0
    max_bleu = 0

    total_epoch = args.epoch
    print(args)
    for epoch in range(1, total_epoch + 1):
        random.shuffle(train_data)
        trainIters(args,
                   epoch,
                   encoder,
                   decoder,
                   total_epoch,
                   train_data,
                   vocab,
                   train_loader,
                   print_every=2,
                   learning_rate=lr)

        if epoch % 20 == 0:
            a_at_5, a_at_1 = ev.evaluate_similarity(encoder,
                                                    vocab,
                                                    batch_size,
                                                    decoder=decoder)

            if a_at_1 > max_a_at_1:
                max_a_at_1 = a_at_1
                print("[INFO] New record! accuracy@1: %.4f" % a_at_1)

            if a_at_5 > max_a_at_5:
                max_a_at_5 = a_at_5
                print("[INFO] New record! accuracy@5: %.4f" % a_at_5)
                if args.save == 'y':
                    torch.save(encoder.state_dict(), 'encoder-max.model')
                    torch.save(decoder.state_dict(), 'decoder-max.model')
                    print("[INFO] new model saved")

            bleu = ev.evaluateRandomly(encoder, decoder, train_data, vocab,
                                       batch_size)
            if bleu > max_bleu:
                max_bleu = bleu
                if args.save == 'y':
                    torch.save(encoder.state_dict(), 'encoder-max-bleu.model')
                    torch.save(decoder.state_dict(), 'decoder-max-bleu.model')
                    print("[INFO] new model saved")

    print("Done! max accuracy@5: %.4f, max accuracy@1: %.4f" %
          (max_a_at_5, max_a_at_1))
    print("max bleu: %.2f" % max_bleu)
    if args.save == 'y':
        torch.save(encoder.state_dict(), 'encoder-last.model')
        torch.save(decoder.state_dict(), 'decoder-last.model')
示例#3
0
    args = parser.parse_args()

    global batch_size
    batch_size = args.batch_size
    hidden_size = args.hidden_size
    w_embed_size = args.w_embed_size

    train_file = 'data/train_data_nv.txt'

    vocab = Vocab()
    vocab.build(train_file)

    if args.pre_trained_embed == 'n':
        encoder = Encoder(vocab.n_words, w_embed_size, hidden_size,
                          batch_size).to(device)
        decoder = AttentionDecoder(vocab.n_words, w_embed_size, hidden_size,
                                   batch_size).to(device)
        # decoder = Decoder(vocab.n_words, w_embed_size, hidden_size, batch_size).to(device)
    else:
        # load pre-trained embedding
        weight = vocab.load_weight(path="data/komoran_hd_2times.vec")
        encoder = Encoder(vocab.n_words, w_embed_size, hidden_size, batch_size,
                          weight).to(device)
        decoder = AttentionDecoder(vocab.n_words, w_embed_size, hidden_size,
                                   batch_size, weight).to(device)
        # decoder = Decoder(vocab.n_words, w_embed_size, hidden_size, batch_size, weight).to(device)

    if args.encoder:
        encoder.load_state_dict(torch.load(args.encoder))
        print("[INFO] load encoder with %s" % args.encoder)
    if args.decoder:
        decoder.load_state_dict(torch.load(args.decoder))
 def test_forward(self):
     input = (Variable(torch.randn(3, 2, 4).double(), requires_grad=True),)
     test = gradcheck(AttentionDecoder(2, 4, 1).double(), input, eps=1e-6, atol=1e-4)
     print(test)
 def test_forward_dimensions(self):
     input = Variable(torch.randn(3, 2, 4).double(), requires_grad=True)
     output = AttentionDecoder(2, 4, 1).double().forward(input)
     assert len(output.size()) == 3
示例#6
0
    ############### model, optimizer ########################
    logging.info("loading model and optimizer...")
    if torch.cuda.is_available():
        device = torch.device("cuda")
        logging.info("using {} GPU(s)".format(torch.cuda.device_count()))
    else:
        device = torch.device("cpu")
        logging.info("using CPU")
    if CONFIG.hyperparam.attention:
        model = AttentionDecoder(
            feature_dim=CONFIG.hyperparam.feature.dim,
            emb_dim=CONFIG.hyperparam.rnn.word_emb_dim,
            memory_dim=CONFIG.hyperparam.rnn.memory_dim,
            vocab_size=len(tokenizer),
            max_seqlen=CONFIG.hyperparam.tokenizer.max_len,
            dropout_p=CONFIG.hyperparam.rnn.dropout_prob,
            ss_prob=CONFIG.hyperparam.rnn.scheduled_sampling_prob,
            bos_idx=tokenizer.bosidx,
            pad_idx=tokenizer.padidx,
        )
    else:
        model = SimpleDecoder(
            feature_dim=CONFIG.hyperparam.feature.dim,
            emb_dim=CONFIG.hyperparam.rnn.word_emb_dim,
            memory_dim=CONFIG.hyperparam.rnn.memory_dim,
            vocab_size=len(tokenizer),
            max_seqlen=CONFIG.hyperparam.tokenizer.max_len,
            dropout_p=CONFIG.hyperparam.rnn.dropout_prob,
            ss_prob=CONFIG.hyperparam.rnn.scheduled_sampling_prob,
            bos_idx=tokenizer.bosidx,
示例#7
0
    s = '{} {:.0f}% {} {:.4f}'.format(step, 100 * step / n_steps, since(start),
                                      loss)
    print(s)


n_steps = 75000
print_every = 1000
plot_every = 100

teacher_forcing_ratio = 0.5
max_len = max_len + 1  # 在每句的句末加上了一个<EOS>
hidden_size = 256

if __name__ == '__main__':
    encoder = Encoder(source_lang_dict.n_words, hidden_size)
    decoder = AttentionDecoder(hidden_size, target_lang_dict.n_words, max_len)

    sos_var = Variable(
        torch.cuda.LongTensor([target_lang_dict.word2idx['SOS']]))
    eos_idx = target_lang_dict.word2idx['EOS']

    e_optimizer = torch.optim.SGD(encoder.parameters(), lr=1e-2)
    d_optimizer = torch.optim.SGD(decoder.parameters(), lr=1e-2)
    loss_fn = torch.nn.functional.nll_loss

    encoder.cuda()
    decoder.cuda()

    print_loss = 0
    plot_loss = 0
    start = time.time()
    def train(self):
        encoder_parameter = self.encoder_parameter()
        decoder_parameter = self.decoder_parameter()

        encoder = Encoder(**encoder_parameter)
        decoder = AttentionDecoder(**decoder_parameter)
        model = Seq2Seq(encoder, decoder, self.args.sequence_size,
                        self.args.get_attention)
        model.train()
        model.to(device)

        optimizer = opt.Adam(model.parameters(), lr=self.args.learning_rate)

        epoch_step = len(self.train_loader) + 1
        total_step = self.args.epochs * epoch_step
        teacher_forcing_ratios = self.cal_teacher_forcing_ratio(total_step)

        step = 0
        attention = None

        for epoch in range(self.args.epochs):
            for i, data in enumerate(self.train_loader, 0):
                try:
                    src_input, trg_input, trg_output = data

                    if self.args.get_attention:
                        output, attention = model(
                            src_input,
                            trg_input,
                            teacher_forcing_rate=teacher_forcing_ratios[i])
                    else:
                        output = model(
                            src_input,
                            trg_input,
                            teacher_forcing_rate=teacher_forcing_ratios[i])

                    # Get loss & accuracy
                    loss, accuracy = self.loss_accuracy(output, trg_output)

                    # Training Log
                    if step % self.args.train_step_print == 0:
                        self.writer.add_scalar('train/loss', loss.item(), step)
                        self.writer.add_scalar('train/accuracy',
                                               accuracy.item(), step)

                        print(
                            '[Train] epoch : {0:2d}  iter: {1:4d}/{2:4d}  step : {3:6d}/{4:6d}  '
                            '=>  loss : {5:10f}  accuracy : {6:12f}'.format(
                                epoch, i, epoch_step, step, total_step,
                                loss.item(), accuracy.item()))

                    # Validation Log
                    if step % self.args.val_step_print == 0:
                        with torch.no_grad():
                            val_loss, val_accuracy = self.val(
                                model,
                                teacher_forcing_ratio=teacher_forcing_ratios[i]
                            )
                            self.writer.add_scalar('val/loss', val_loss, step)
                            self.writer.add_scalar('val/accuracy',
                                                   val_accuracy, step)

                            print(
                                '[ Val ] epoch : {0:2d}  iter: {1:4d}/{2:4d}  step : {3:6d}/{4:6d}  '
                                '=>  loss : {5:10f}  accuracy : {6:12f}'.
                                format(epoch, i, epoch_step, step, total_step,
                                       val_loss, val_accuracy))

                    # Save Model Point
                    if step % self.args.step_save == 0:
                        if self.args.get_attention:
                            self.plot_attention(step, src_input, trg_input,
                                                attention)
                        self.model_save(model=model,
                                        optimizer=optimizer,
                                        epoch=epoch,
                                        step=step)

                    # Optimizer
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    step += 1

                # If KeyBoard Interrupt Save Model
                except KeyboardInterrupt:
                    self.model_save(model=model,
                                    optimizer=optimizer,
                                    epoch=epoch,
                                    step=step)
示例#9
0
    hidden_size = 512
    embed_size = 256
    print(device)

    print('Loading dataset ......')
    train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size)
    de_size, en_size = len(DE.vocab), len(EN.vocab)
    print(
        "[TRAIN]:%d (dataset:%d)\t[TEST]:%d (dataset:%d)\t[VALUATE]:%d (dataset:%d)"
        % (len(train_iter), len(train_iter.dataset), len(test_iter),
           len(test_iter.dataset), len(val_iter), len(val_iter.dataset)))
    print("[DE_vocab]:%d [en_vocab]:%d" % (de_size, en_size))

    print("Initialize model ......")
    encoder = Encoder(de_size, embed_size, hidden_size)
    decoder = AttentionDecoder(en_size, embed_size, hidden_size)
    seq2seq = Seq2Seq(encoder, decoder).to(device)
    optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr)
    print(seq2seq)

    best_val_loss = None
    for epoch in range(0, args.epochs):
        train(seq2seq, optimizer, train_iter, en_size, args.grad_clip, DE, EN)
        val_loss = evaluate(seq2seq, val_iter, en_size, DE, EN)
        now_time = time.time()
        print("[Epoch:{}] val_loss:{} | val_pp:{} | Time: {}h{}m{}s".format(
            epoch, val_loss, math.exp(val_loss),
            (now_time - start_time) // 3600,
            (now_time - start_time) % 3600 // 60,
            (now_time - start_time) % 60))