def load_model(path, vocab_size, model_class='GRU'): """ Load a model given saved states From: https://pytorch.org/tutorials/beginner/saving_loading_models.html Default model parameters: --model=RNN --optimizer=SGD --initial_lr=1.0 --batch_size=128 --seq_len=35 --hidden_size=512 --num_layers=2 --dp_keep_prob=0.8 --num_epochs=20 --save_best --model=GRU --optimizer=ADAM --initial_lr=0.001 --batch_size=128 --seq_len=35 --hidden_size=512 --num_layers=2 --dp_keep_prob=0.5 --num_epochs=20 --save_best :return: """ # Default if model_class is 'GRU': model = GRU(emb_size=200, hidden_size=512, seq_len=35, batch_size=128, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.8) else: model = RNN(emb_size=200, hidden_size=512, seq_len=35, batch_size=128, vocab_size=vocab_size, num_layers=2, dp_keep_prob=0.8) model.load_state_dict(torch.load(path)) return model
############################################################################### # # MODEL SETUP # ############################################################################### # NOTE ============================================== # This is where your model code will be called. if args.model == 'RNN': model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'GRU': model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=args.batch_size, vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) elif args.model == 'TRANSFORMER': if args.debug: # use a very small model model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2) else: # Note that we're using num_layers and hidden_size to mean slightly # different things here than in the RNNs. # Also, the Transformer also has other hyperparameters # (such as the number of attention heads) which can change it's behavior. model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size, n_blocks=args.num_layers, dropout=1.-args.dp_keep_prob) # these 3 attributes don't affect the Transformer's computations; # they are only used in run_epoch model.batch_size=args.batch_size model.seq_len=args.seq_len
hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=10, vocab_size=10000, num_layers=args.num_layers, dp_keep_prob=0.8) model.load_state_dict( torch.load( "RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=20_save_best_0/best_params.pt" )) elif args.model == "GRU": model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size, seq_len=args.seq_len, batch_size=10, vocab_size=10000, num_layers=args.num_layers, dp_keep_prob=0.5) model.load_state_dict( torch.load( "GRU_ADAM_model=GRU_optimizer=ADAM_initial_lr=0.001_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.5_num_epochs=20_save_best_0/best_params.pt" )) #print(model.out_layer.weight.data) #toy #model.load_state_dict(torch.load("RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=1_save_best_0/best_params.pt")) model.eval() #print(model.out_layer.weight.data)
# VOCAB_SIZE = 10000 # NUM_LAYERS = 2 # DP_KEEP_PROB = 0.5 # SEQ_LEN = 35 GENERATED_SEQ_LEN = 34 #--------------- LOAD MODEL load_path = os.path.join(MODEL_PATH, 'best_params.pt') model = GRU(emb_size=200, hidden_size=512, seq_len=35, batch_size=128, vocab_size=10000, num_layers=2, dp_keep_prob=0.5) model.load_state_dict(torch.load(load_path, map_location='cpu')) hidden = model.init_hidden() model.eval() #--------------- GENERATE SAMPLES # first_words = torch.LongTensor(BATCH_SIZE).random_(0, 10000) # samples = model.generate(torch.zeros(BATCH_SIZE).to(torch.long), hidden, generated_seq_len=GENERATED_SEQ_LEN) samples = model.generate(first_words, hidden, generated_seq_len=GENERATED_SEQ_LEN) #-------------- CONVERTING TO WORDS