Пример #1
0
def load_model(vocab_size):
    config = {
        "rnn_dim": 128,
        "embedding_dim": 128,
        "encoder_layers": 2,
        "bidirectional": False,
        "decoder_layers": 2
    }
    model = models.Seq2Seq(vocab_size, config)
    return model
Пример #2
0
    def _build(self):
        self.vocab_size = min(self.num_words, len(
            self.tokenizer.word_index)) + 2

        self.embedding = tf.keras.layers.Embedding(self.vocab_size,
                                                   self.embedding_dims)
        self.encoder = models.BidirectionalMultiLayersEncoder(
            self.encoder_config,
            self.rnn_units,
            self.drop_prob,
            return_sequences=True)
        self.decoder = models.MultiLayersDecoder(self.decoder_config,
                                                 self.rnn_units * 2,
                                                 self.vocab_size,
                                                 self.drop_prob)
        self.seq2seq = models.Seq2Seq(
            self.embedding,
            self.encoder,
            self.decoder,
            if_use_attention=self.if_use_attention,
            trainable_embedding=self.trainable_embedding)
Пример #3
0
def pick_model(args, dicts):
    """
        Use args to initialize the appropriate model
    """
    Y = get_num_labels(args.Y)
    if args.model == "rnn":
        model = models.VanillaRNN(Y, args.embed_file, dicts, args.rnn_dim,
                                  args.cell_type, args.rnn_layers, args.gpu,
                                  args.embed_size, args.bidirectional)
    elif args.model == "cnn_vanilla":
        filter_size = int(args.filter_size)
        model = models.VanillaConv(Y, args.embed_file, filter_size,
                                   args.num_filter_maps, args.gpu, dicts,
                                   args.embed_size, args.dropout)
    elif args.model == "conv_attn":
        filter_size = int(args.filter_size)
        model = models.ConvAttnPool(Y,
                                    args.embed_file,
                                    filter_size,
                                    args.num_filter_maps,
                                    args.lmbda,
                                    args.gpu,
                                    dicts,
                                    embed_size=args.embed_size,
                                    dropout=args.dropout)
    elif args.model == "rnn_attn":
        encoder = models.Encoder(Y,
                                 args.embed_file,
                                 dicts,
                                 embed_size=args.embed_size)
        decoder = models.Decoder(Y, args.embed_file, dicts)
        model = models.Seq2Seq(encoder, decoder, Y, args.embed_file, dicts)
    elif args.model == "saved":
        model = torch.load(args.test_model)
    if args.gpu:
        model.cuda()
    return model
Пример #4
0
N_LAYERS = config.N_LAYERS
ENC_DROPOUT = config.ENC_DROPOUT
DEC_DROPOUT = config.DEC_DROPOUT

encoder = models.encoderRNN(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                            ENC_DROPOUT)
#encoder = nn.DataParallel(encoder).cuda()
encoder.cuda()
decoder = models.decoderRNN(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                            DEC_DROPOUT)
#decoder = nn.DataParallel(decoder).cuda()
decoder.cuda()
print(encoder)
print(decoder)

model = models.Seq2Seq(encoder, decoder, load=True)

##Define the optimizer and all
#params= list(model.encoder.module.parameters())+list(model.decoder.module.parameters())
params = list(model.encoder.parameters()) + list(model.decoder.parameters())
optimizer = optim.Adagrad(params, lr=0.15, initial_accumulator_value=0.1)
TRG_PAD_IDX = vocab.stoi['<pad>']


def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

Пример #5
0
ARTICLE, TITLE, train,test = read_data()

INPUT_DIM = len(ARTICLE.vocab)
OUTPUT_DIM = len(TITLE.vocab)

ENC_EMB_DIM = 512
DEC_EMB_DIM = 512
ENC_HID_DIM = 256
DEC_HID_DIM = 256
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5
PAD_IDX = ARTICLE.vocab.stoi['<pad>']
SOS_IDX = TITLE.vocab.stoi['<sos>']
EOS_IDX = TITLE.vocab.stoi['<eos>']
attn = Attention(ENC_HID_DIM, DEC_HID_DIM)
encoder = models.Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT)
decoder = models.Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn)

model = models.Seq2Seq(encoder, decoder, device,PAD_IDX,SOS_IDX,EOS_IDX).to(device)
weigth = th.load("save_model/title_generate.pth")
model.load_state_dict(weigth)
article = "7月2日,プライベート写真が流出したことでAKB48としての活動を辞退した米沢瑠美が、新しい事務所が決まったことを自身のツイッターで明かした。米沢は7月1日、「みんなに早く伝えたかった事を、話せる準備が整ってきましたっ☆ まず、所属事務所のご報告。エムズエンタープライズさんに所属することになりました☆」と報告。今年3月いっぱいで所属事務所との契約が満了したため、約2年間続いたブログを閉鎖することとなった米沢だが、今回事務所が決まったことで、新たなオフィシャルブログを製作中。今月中旬頃にはスタートする予定だという。また、「これからは演技のお仕事を中心に頑張っていきたいと思っております(^^)」と今後の方針を示唆。どんどん活動の場を広げると思われる米沢から、今後も目が離せそうにない。"
idx_ = 90 # article に対応するtitleのindex
#article = "".join(vars(train.examples[idx_])["article"])
true = "".join(vars(train.examples[idx_])["title"])
pred_title, attention = generate(model,article)
print("".join(article))
print("[predict]","".join(pred_title))
print("[true]",true)
display_attention(article,pred_title,attention)
Пример #6
0
def main():
    # Get arguments
    args = parse_args()

    # Set random seed
    torch.manual_seed(args.seed)

    # Cuda
    use_cuda = False
    if torch.cuda.is_available():
        if not args.cuda:
            print("WARNING: You have a CUDA device, so you \
            should probably run with --cuda")
        else:
            use_cuda = True
            torch.cuda.manual_seed(args.seed)

    # Load data + text fields
    print('=' * 89)
    train_iter, val_iter, test_iter, SRC, TRG = utils.load_dataset(
        batch_size=args.batch_size,
        use_pretrained_emb=args.pretrained_emb,
        save_dir=SAVE_DIR
    )
    print('=' * 89)

    # Intialize model
    enc = models.EncoderRNN(
        input_size=len(SRC.vocab),
        emb_size=(SRC.vocab.vectors.size(1)
                  if args.pretrained_emb == 'fastText'
                  else args.emb_size),
        embeddings=(SRC.vocab.vectors
                    if args.pretrained_emb == 'fastText'
                    else None),
        max_norm=args.emb_maxnorm,
        padding_idx=SRC.vocab.stoi['<pad>'],
        hidden_size=args.hidden_size,
        num_layers=args.num_layers,
        dropout=args.dropout,
        bidirectional=args.bidirectional
    )
    decoder = models.AttnDecoderRNN if args.attention else models.DecoderRNN
    dec = decoder(
        enc_num_directions=enc.num_directions,
        enc_hidden_size=args.hidden_size,
        use_context=args.use_context,
        input_size=len(TRG.vocab),
        emb_size=(TRG.vocab.vectors.size(1)
                  if args.pretrained_emb
                  else args.emb_size),
        embeddings=(TRG.vocab.vectors
                    if args.pretrained_emb
                    else None),
        max_norm=args.emb_maxnorm,
        padding_idx=TRG.vocab.stoi['<pad>'],
        hidden_size=args.hidden_size,
        num_layers=args.num_layers,
        dropout=args.dropout,
        bidirectional=False # args.bidirectional
    )
    model = models.Seq2Seq(enc, dec, use_cuda=use_cuda)
    if use_cuda:
        model.cuda()
    print(model)

    # Intialize loss
    criterion = torch.nn.CrossEntropyLoss(
        ignore_index=TRG.vocab.stoi["<pad>"])

    # Create optimizer
    if args.optimizer == 'Adam':
        optim = torch.optim.Adam
    elif args.optimizer == 'Adadelta':
        optim = torch.optim.Adadelta
    elif args.optimizer == 'Adagrad':
        optim = torch.optim.Adagrad
    else:
        optim = torch.optim.SGD
    optimizer = optim(model.parameters(), lr=args.lr)

    # Create scheduler
    lambda_lr = lambda epoch: 0.5 if epoch > 8 else 1
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda_lr)

    # Train
    best_val_loss = None
    fname = './{}/{}.pt'.format(SAVE_DIR, args.save)

    print('=' * 89)
    try:
        for epoch in range(1, args.epochs+1):
            epoch_start_time = time.time()

            attns = train(epoch, model, train_iter, criterion, optimizer,
                  use_cuda, args, SRC, TRG)
            val_loss = evaluate(model, val_iter, criterion, use_cuda)

            # Log results
            print('-' * 89)
            print('| end of epoch {:3d} | time: {:5.2f}s '
                  '| valid loss {:5.2f} | valid ppl {:8.2f}'.format(
                      epoch, (time.time() - epoch_start_time),
                      val_loss, math.exp(val_loss)))
            print('-' * 89)

            # Save the model if validation loss is best we've seen so far
            if not best_val_loss or val_loss < best_val_loss:
                if not os.path.isdir(SAVE_DIR):
                    os.makedirs(SAVE_DIR)
                torch.save(model, fname)
                best_val_loss = val_loss

            # Anneal learning rate
            scheduler.step()
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')

    # Load the best saved model
    with open(fname, 'rb') as f:
        model = torch.load(f)

    # Run on test data
    test_loss = evaluate(model, test_iter, criterion, use_cuda)

    # Log results
    print('=' * 89)
    print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
        test_loss, math.exp(test_loss)))
    print('=' * 89)
Пример #7
0
ENC_HID_DIM = config.ENC_HID_DIM
DEC_HID_DIM = config.DEC_HID_DIM
N_LAYERS = config.N_LAYERS
ENC_DROPOUT = config.ENC_DROPOUT
DEC_DROPOUT = config.DEC_DROPOUT

encoder = models.encoderRNN(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                            ENC_DROPOUT)
#encoder = nn.DataParallel(encoder).cuda()
encoder.cuda()
decoder = models.decoderRNN(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                            DEC_DROPOUT)
#decoder = nn.DataParallel(decoder).cuda()
decoder.cuda()

model = models.Seq2Seq(encoder, decoder, load=False)

##Define the optimizer and all
#params= list(model.encoder.module.parameters())+list(model.decoder.module.parameters())
params = list(model.encoder.parameters()) + list(model.decoder.parameters())
optimizer = optim.Adagrad(params, lr=0.15, initial_accumulator_value=0.1)
TRG_PAD_IDX = vocab.stoi['<pad>']
criterion = nn.CrossEntropyLoss()


def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs