示例#1
0
文件: q5.py 项目: lebrice/IFT6135
def get_best_model(model_type: str) -> nn.Module:
    model: nn.Module = None
    if model_type == 'RNN':
        model = RNN(emb_size=200,
                    hidden_size=1500,
                    seq_len=35,
                    batch_size=20,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
        model.load_state_dict(
            torch.load('./4_1_a/best_params.pt', map_location=device))
    elif model_type == 'GRU':
        model = GRU(emb_size=200,
                    hidden_size=1500,
                    seq_len=35,
                    batch_size=20,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
        model.load_state_dict(
            torch.load('./4_1_b/best_params.pt', map_location=device))
    elif model_type == 'TRANSFORMER':
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=512,
                            n_blocks=6,
                            dropout=1. - 0.9)
        model.batch_size = 128
        model.seq_len = 35
        model.vocab_size = vocab_size
        model.load_state_dict(torch.load('./4_1_c/best_params.pt'))
    return model
示例#2
0
def make_my_model(model_name, device, seq_len=35, batch_size=20, pt=None):
    #          --model=RNN --optimizer=ADAM --initial_lr=0.0001 --batch_size=20 --seq_len=35 --hidden_size=1500 --num_layers=2 --dp_keep_prob=0.35 --save_best
    #          --model=GRU --optimizer=SGD_LR_SCHEDULE --initial_lr=10 --batch_size=20 --seq_len=35 --hidden_size=1500 --num_layers=2 --dp_keep_prob=0.35 --save_best
    #          --model=TRANSFORMER --optimizer=SGD_LR_SCHEDULE --initial_lr=20 --batch_size=128 --seq_len=35 --hidden_size=512 --num_layers=6 --dp_keep_prob=0.9 --save_best
    if model_name == 'RNN':
        model = RNN(emb_size=200,
                    hidden_size=1500,
                    seq_len=seq_len,
                    batch_size=batch_size,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
    elif model_name == 'GRU':
        model = GRU(emb_size=200,
                    hidden_size=1500,
                    seq_len=seq_len,
                    batch_size=batch_size,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
    elif model_name == 'TRANSFORMER':
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=512,
                            n_blocks=6,
                            dropout=1. - 0.9)
        # these 3 attributes don't affect the Transformer's computations;
        # they are only used in run_epoch
        model.batch_size = 128
        model.seq_len = 35
        model.vocab_size = vocab_size
    else:
        print("ERROR: Model type not recognized.")
        return
    # Model to device
    model = model.to(device)
    # Load pt
    if pt is not None:
        model.load_state_dict(torch.load(pt, map_location=device))
    return model
def load_model(model_info,
               device,
               vocab_size,
               emb_size=200,
               load_on_device=True):
    params_path = model_info.get_params_path()

    if model_info.model == 'RNN':
        model = RNN(emb_size=emb_size,
                    hidden_size=model_info.hidden_size,
                    seq_len=model_info.seq_len,
                    batch_size=model_info.batch_size,
                    vocab_size=vocab_size,
                    num_layers=model_info.num_layers,
                    dp_keep_prob=model_info.dp_keep_prob)
    elif model_info.model == 'GRU':
        model = GRU(emb_size=emb_size,
                    hidden_size=model_info.hidden_size,
                    seq_len=model_info.seq_len,
                    batch_size=model_info.batch_size,
                    vocab_size=vocab_size,
                    num_layers=model_info.num_layers,
                    dp_keep_prob=model_info.dp_keep_prob)
    else:
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=model_info.hidden_size,
                            n_blocks=model_info.num_layers,
                            dropout=1. - model_info.dp_keep_prob)
        model.batch_size = model_info.batch_size
        model.seq_len = model_info.seq_len
        model.vocab_size = vocab_size

    if load_on_device:
        model = model.to(device)
    model.load_state_dict(torch.load(params_path, map_location=device))
    return model
示例#4
0
文件: ptb-lm.py 项目: lebrice/IFT6135
elif args.model == 'TRANSFORMER':
    if args.debug:  # use a very small model
        model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2)
    else:
        # Note that we're using num_layers and hidden_size to mean slightly
        # different things here than in the RNNs.
        # Also, the Transformer also has other hyperparameters
        # (such as the number of attention heads) which can change it's behavior.
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=args.hidden_size,
                            n_blocks=args.num_layers,
                            dropout=1. - args.dp_keep_prob)
    # these 3 attributes don't affect the Transformer's computations;
    # they are only used in run_epoch
    model.batch_size = args.batch_size
    model.seq_len = args.seq_len
    model.vocab_size = vocab_size
else:
    print("Model type not recognized.")

model = model.to(device)

# LOSS FUNCTION
loss_fn = nn.CrossEntropyLoss()
if args.optimizer == 'ADAM':
    optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr)

# LEARNING RATE SCHEDULE
lr = args.initial_lr
lr_decay_base = 1 / 1.15
m_flat_lr = 14.0  # we will not touch lr for the first m_flat_lr epochs
 if model_types[m]=='RNN':
     model = RNN(emb_size=embSize[m], hidden_size=hiddenSize[m], 
                 seq_len=seqLen[m], batch_size=batchSize[m],
                 vocab_size=vocab_size, num_layers=numLayers[m], 
                 dp_keep_prob=dropOut[m])
 elif model_types[m]=='GRU':
     model =GRU(emb_size=embSize[m], hidden_size=hiddenSize[m], 
                seq_len=seqLen[m], batch_size=batchSize[m],
                vocab_size=vocab_size, num_layers=numLayers[m], 
                dp_keep_prob=dropOut[m])
 else:
     model=TRANSFORMER(vocab_size=vocab_size,n_units=hiddenSize[m],
                       n_blocks=numLayers[m],dropout=1-dropOut[m])
 model.load_state_dict(torch.load(path[m]))
 model.batch_size=batchSize[m]
 model.seq_len=seqLen[m]
 model.vocab_size=vocab_size
 model = model.to(device)
 
 # MAIN LOOP
 val_loss = run_epoch(model, valid_data,model_types[m])
 total_loss[m,:]=val_loss
 time=np.arange(1,seqLen[m]+1)
 print('Plotting graph...')
 plt.figure()
 plt.plot(time, val_loss.flatten(), label='Val. Loss')
 plt.ylabel('Average loss')
 plt.xlabel('time-step (t)')
 plt.grid(True)
 plt.title('Average loss at each time-step on validation for '+model_types[m])
 plt.savefig(os.path.join(model_types[m]+'_avg_loss.png'))
           batch_size=20,
           num_layers=2,
           vocab_size=vocab_size,
           dp_keep_prob=0.35).to(device)
 GRU = GRU(emb_size=200,
           hidden_size=1500,
           seq_len=0,
           batch_size=20,
           num_layers=2,
           vocab_size=vocab_size,
           dp_keep_prob=0.35).to(device)
 for seq_len in seq_lens:
     print("Sequence length: ", seq_len)
     #RNN output
     #Load "Best params model"
     RNN.seq_len = seq_len
     RNN.load_state_dict(
         torch.load(RNN_bestparams_path, map_location=device))
     RNN_generation = generation(RNN, train_data, valid_data, test_data,
                                 word_to_id, id_2_word, seq_len, BatchSize)
     #         print("RNN generated:")
     #         print(RNN_generation)
     with open(os.path.join(OUTPUTPATH, 'RNN_%s_samples.txt' % (seq_len)),
               'w') as f:
         f.write("Model RNN. Sequence length: %s\n" % (seq_len))
         for index, sentence in enumerate(RNN_generation):
             f.write("Sentence %s: %s\n" % (index, sentence))
     #GRU output
     #Load "Best params model"
     GRU.seq_len = seq_len
     GRU.load_state_dict(
                        dp_keep_prob=args["dp_keep_prob"])
        elif args["model"] == 'TRANSFORMER':
            if args["debug"]:  # use a very small model
                model = TRANSFORMER(vocab_size=vocab_size,
                                    n_units=16,
                                    n_blocks=2)
            else:
                # Note that we're using num_layers and hidden_size to mean slightly
                # different things here than in the RNNs.
                # Also, the Transformer also has other hyperparameters
                # (such as the number of attention heads) which can change it's behavior.
                model = TRANSFORMER(vocab_size=vocab_size,
                                    n_units=args["hidden_size"],
                                    n_blocks=args["num_layers"],
                                    dropout=1. - args["dp_keep_prob"])
            # these 3 attributes don't affect the Transformer's computations;
            # they are only used in run_epoch
            model.batch_size = args["batch_size"]
            model.seq_len = args["seq_len"]
            model.vocab_size = vocab_size
        else:
            raise ValueError("Model type not recognized.")

        model = model.to(device)

        best_valid_loss = train(model, args, train_data, valid_data)

        hp_optimizer.tell(hyperparameters, best_valid_loss)

        save_optimizer(hp_optimizer, hp_search_folder)