def load_model(vocab_size): config = { "rnn_dim": 128, "embedding_dim": 128, "encoder_layers": 2, "bidirectional": False, "decoder_layers": 2 } model = models.Seq2Seq(vocab_size, config) return model
def _build(self): self.vocab_size = min(self.num_words, len( self.tokenizer.word_index)) + 2 self.embedding = tf.keras.layers.Embedding(self.vocab_size, self.embedding_dims) self.encoder = models.BidirectionalMultiLayersEncoder( self.encoder_config, self.rnn_units, self.drop_prob, return_sequences=True) self.decoder = models.MultiLayersDecoder(self.decoder_config, self.rnn_units * 2, self.vocab_size, self.drop_prob) self.seq2seq = models.Seq2Seq( self.embedding, self.encoder, self.decoder, if_use_attention=self.if_use_attention, trainable_embedding=self.trainable_embedding)
def pick_model(args, dicts): """ Use args to initialize the appropriate model """ Y = get_num_labels(args.Y) if args.model == "rnn": model = models.VanillaRNN(Y, args.embed_file, dicts, args.rnn_dim, args.cell_type, args.rnn_layers, args.gpu, args.embed_size, args.bidirectional) elif args.model == "cnn_vanilla": filter_size = int(args.filter_size) model = models.VanillaConv(Y, args.embed_file, filter_size, args.num_filter_maps, args.gpu, dicts, args.embed_size, args.dropout) elif args.model == "conv_attn": filter_size = int(args.filter_size) model = models.ConvAttnPool(Y, args.embed_file, filter_size, args.num_filter_maps, args.lmbda, args.gpu, dicts, embed_size=args.embed_size, dropout=args.dropout) elif args.model == "rnn_attn": encoder = models.Encoder(Y, args.embed_file, dicts, embed_size=args.embed_size) decoder = models.Decoder(Y, args.embed_file, dicts) model = models.Seq2Seq(encoder, decoder, Y, args.embed_file, dicts) elif args.model == "saved": model = torch.load(args.test_model) if args.gpu: model.cuda() return model
N_LAYERS = config.N_LAYERS ENC_DROPOUT = config.ENC_DROPOUT DEC_DROPOUT = config.DEC_DROPOUT encoder = models.encoderRNN(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) #encoder = nn.DataParallel(encoder).cuda() encoder.cuda() decoder = models.decoderRNN(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT) #decoder = nn.DataParallel(decoder).cuda() decoder.cuda() print(encoder) print(decoder) model = models.Seq2Seq(encoder, decoder, load=True) ##Define the optimizer and all #params= list(model.encoder.module.parameters())+list(model.decoder.module.parameters()) params = list(model.encoder.parameters()) + list(model.decoder.parameters()) optimizer = optim.Adagrad(params, lr=0.15, initial_accumulator_value=0.1) TRG_PAD_IDX = vocab.stoi['<pad>'] def epoch_time(start_time, end_time): elapsed_time = end_time - start_time elapsed_mins = int(elapsed_time / 60) elapsed_secs = int(elapsed_time - (elapsed_mins * 60)) return elapsed_mins, elapsed_secs
ARTICLE, TITLE, train,test = read_data() INPUT_DIM = len(ARTICLE.vocab) OUTPUT_DIM = len(TITLE.vocab) ENC_EMB_DIM = 512 DEC_EMB_DIM = 512 ENC_HID_DIM = 256 DEC_HID_DIM = 256 ENC_DROPOUT = 0.5 DEC_DROPOUT = 0.5 PAD_IDX = ARTICLE.vocab.stoi['<pad>'] SOS_IDX = TITLE.vocab.stoi['<sos>'] EOS_IDX = TITLE.vocab.stoi['<eos>'] attn = Attention(ENC_HID_DIM, DEC_HID_DIM) encoder = models.Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) decoder = models.Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn) model = models.Seq2Seq(encoder, decoder, device,PAD_IDX,SOS_IDX,EOS_IDX).to(device) weigth = th.load("save_model/title_generate.pth") model.load_state_dict(weigth) article = "7月2日,プライベート写真が流出したことでAKB48としての活動を辞退した米沢瑠美が、新しい事務所が決まったことを自身のツイッターで明かした。米沢は7月1日、「みんなに早く伝えたかった事を、話せる準備が整ってきましたっ☆ まず、所属事務所のご報告。エムズエンタープライズさんに所属することになりました☆」と報告。今年3月いっぱいで所属事務所との契約が満了したため、約2年間続いたブログを閉鎖することとなった米沢だが、今回事務所が決まったことで、新たなオフィシャルブログを製作中。今月中旬頃にはスタートする予定だという。また、「これからは演技のお仕事を中心に頑張っていきたいと思っております(^^)」と今後の方針を示唆。どんどん活動の場を広げると思われる米沢から、今後も目が離せそうにない。" idx_ = 90 # article に対応するtitleのindex #article = "".join(vars(train.examples[idx_])["article"]) true = "".join(vars(train.examples[idx_])["title"]) pred_title, attention = generate(model,article) print("".join(article)) print("[predict]","".join(pred_title)) print("[true]",true) display_attention(article,pred_title,attention)
def main(): # Get arguments args = parse_args() # Set random seed torch.manual_seed(args.seed) # Cuda use_cuda = False if torch.cuda.is_available(): if not args.cuda: print("WARNING: You have a CUDA device, so you \ should probably run with --cuda") else: use_cuda = True torch.cuda.manual_seed(args.seed) # Load data + text fields print('=' * 89) train_iter, val_iter, test_iter, SRC, TRG = utils.load_dataset( batch_size=args.batch_size, use_pretrained_emb=args.pretrained_emb, save_dir=SAVE_DIR ) print('=' * 89) # Intialize model enc = models.EncoderRNN( input_size=len(SRC.vocab), emb_size=(SRC.vocab.vectors.size(1) if args.pretrained_emb == 'fastText' else args.emb_size), embeddings=(SRC.vocab.vectors if args.pretrained_emb == 'fastText' else None), max_norm=args.emb_maxnorm, padding_idx=SRC.vocab.stoi['<pad>'], hidden_size=args.hidden_size, num_layers=args.num_layers, dropout=args.dropout, bidirectional=args.bidirectional ) decoder = models.AttnDecoderRNN if args.attention else models.DecoderRNN dec = decoder( enc_num_directions=enc.num_directions, enc_hidden_size=args.hidden_size, use_context=args.use_context, input_size=len(TRG.vocab), emb_size=(TRG.vocab.vectors.size(1) if args.pretrained_emb else args.emb_size), embeddings=(TRG.vocab.vectors if args.pretrained_emb else None), max_norm=args.emb_maxnorm, padding_idx=TRG.vocab.stoi['<pad>'], hidden_size=args.hidden_size, num_layers=args.num_layers, dropout=args.dropout, bidirectional=False # args.bidirectional ) model = models.Seq2Seq(enc, dec, use_cuda=use_cuda) if use_cuda: model.cuda() print(model) # Intialize loss criterion = torch.nn.CrossEntropyLoss( ignore_index=TRG.vocab.stoi["<pad>"]) # Create optimizer if args.optimizer == 'Adam': optim = torch.optim.Adam elif args.optimizer == 'Adadelta': optim = torch.optim.Adadelta elif args.optimizer == 'Adagrad': optim = torch.optim.Adagrad else: optim = torch.optim.SGD optimizer = optim(model.parameters(), lr=args.lr) # Create scheduler lambda_lr = lambda epoch: 0.5 if epoch > 8 else 1 scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda_lr) # Train best_val_loss = None fname = './{}/{}.pt'.format(SAVE_DIR, args.save) print('=' * 89) try: for epoch in range(1, args.epochs+1): epoch_start_time = time.time() attns = train(epoch, model, train_iter, criterion, optimizer, use_cuda, args, SRC, TRG) val_loss = evaluate(model, val_iter, criterion, use_cuda) # Log results print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s ' '| valid loss {:5.2f} | valid ppl {:8.2f}'.format( epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Save the model if validation loss is best we've seen so far if not best_val_loss or val_loss < best_val_loss: if not os.path.isdir(SAVE_DIR): os.makedirs(SAVE_DIR) torch.save(model, fname) best_val_loss = val_loss # Anneal learning rate scheduler.step() except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # Load the best saved model with open(fname, 'rb') as f: model = torch.load(f) # Run on test data test_loss = evaluate(model, test_iter, criterion, use_cuda) # Log results print('=' * 89) print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) print('=' * 89)
ENC_HID_DIM = config.ENC_HID_DIM DEC_HID_DIM = config.DEC_HID_DIM N_LAYERS = config.N_LAYERS ENC_DROPOUT = config.ENC_DROPOUT DEC_DROPOUT = config.DEC_DROPOUT encoder = models.encoderRNN(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) #encoder = nn.DataParallel(encoder).cuda() encoder.cuda() decoder = models.decoderRNN(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT) #decoder = nn.DataParallel(decoder).cuda() decoder.cuda() model = models.Seq2Seq(encoder, decoder, load=False) ##Define the optimizer and all #params= list(model.encoder.module.parameters())+list(model.decoder.module.parameters()) params = list(model.encoder.parameters()) + list(model.decoder.parameters()) optimizer = optim.Adagrad(params, lr=0.15, initial_accumulator_value=0.1) TRG_PAD_IDX = vocab.stoi['<pad>'] criterion = nn.CrossEntropyLoss() def epoch_time(start_time, end_time): elapsed_time = end_time - start_time elapsed_mins = int(elapsed_time / 60) elapsed_secs = int(elapsed_time - (elapsed_mins * 60)) return elapsed_mins, elapsed_secs