示例#1
0
def make_lstm_model(device, my_words, train_data, valid_data, test_data):
    """
    Makes the LSTM model then calls a function to save it to disk
    :param device: Stores whether to use cuda (GPU)
    :return: None
    """

    vocab_size = len(my_words.word_to_id)
    if args.bi_lstm:
        print("Making Bidirectional LSTM Model")
        models = {'forward': None}
    else:
        print("Making Forward-Backward LSTM Model")
        models = {'forward': None, 'backward': None}

    for direction in models:
        models[direction] = LM_LSTM(embedding_dim=args.embedding_size, num_steps=args.num_steps,
                                    batch_size=args.batch_size, hidden_dim=args.hidden_size,
                                    vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob,
                                    bidirectional=args.bi_lstm)
        models[direction].direction = direction
        models[direction].to(device)  # Move model to GPU if cuda is utilized
    lr = args.inital_lr
    lr_decay_base = 1 / 1.15  # decay factor for learning rate
    m_flat_lr = 14.0  # we will not touch lr for the first m_flat_lr epochs

    print("########## Training ##########################")
    for epoch in range(args.num_epochs):
        lr_decay = lr_decay_base ** max(epoch - m_flat_lr, 0)
        lr = lr * lr_decay  # decay lr if it is timeUntitled4
        train_p = dict()
        for model in models:
            train_p[model] = run_epoch(models[model], train_data, True, lr, device)
        if "backward" in models:
            print('Train perplexity at epoch {}: forward: {:8.2f}, backward: {:8.2f}'
                  .format(epoch, train_p["forward"], train_p["backward"]))
            # print('Validation perplexity at epoch {}: forward: {:8.2f}, backward: {:8.2f}'
            #       .format(epoch, run_epoch(model['forward'], valid_data, device=device),
            #               run_epoch(model['backward'], valid_data, device=device)))
        else:
            print('Train perplexity at epoch {}: forward: {:8.2f}'
                  .format(epoch, train_p["forward"]))
            # print('Validation perplexity at epoch {}: forward: {:8.2f}'
            #       .format(epoch, run_epoch(model['forward'], valid_data, device=device)))

    save_data(models=models)  # Save the results

    print("########## Testing ##########################")
    for direction in models:
        models[direction].batch_size = 1  # to make sure we process all the data
示例#2
0
                p.data.add_(-lr, p.grad.data)
            if step % (epoch_size // 10) == 10:
                print("{} perplexity: {:8.2f} speed: {} wps".format(
                    step * 1.0 / epoch_size, np.exp(costs / iters),
                    iters * model.batch_size / (time.time() - start_time)))
    return np.exp(costs / iters)


if __name__ == "__main__":
    raw_data = reader.ptb_raw_data(data_path=args.data)
    train_data, valid_data, test_data, word_to_id, id_2_word = raw_data
    vocab_size = len(word_to_id)
    print('Vocabluary size: {}'.format(vocab_size))
    model = LM_LSTM(embedding_dim=args.hidden_size,
                    num_steps=args.num_steps,
                    batch_size=args.batch_size,
                    vocab_size=vocab_size,
                    num_layers=args.num_layers,
                    dp_keep_prob=args.dp_keep_prob)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model.to(device)
    lr = args.inital_lr
    # decay factor for learning rate
    lr_decay_base = 1 / 1.15
    # we will not touch lr for the first m_flat_lr epochs
    m_flat_lr = 14.0

    print("########## Training ##########################")
    for epoch in range(args.num_epochs):
        lr_decay = lr_decay_base**max(epoch - m_flat_lr, 0)
        lr = lr * lr_decay  # decay lr if it is time
示例#3
0
 if args.eval:
     model = torch.load(args.checkpoint)
     model.batch_size = args.batch_size
     optimizer = torch.optim.SGD(model.parameters(), lr=args.initial_lr)
     print('Test Perplexity: {:8.2f}'.format(
         run_epoch(model, test_data, optimizer)))
     sys.exit()
 lr = args.initial_lr
 # decay factor for learning rate
 lr_decay_base = 1 / 1.15
 # we will not touch lr for the first m_flat_lr epochs
 m_flat_lr = 14.0
 model = LM_LSTM(embedding_dim=args.embedding_size,
                 rnn_type=args.rnn_type,
                 hidden_size=args.hidden_size,
                 num_steps=args.num_steps,
                 batch_size=args.batch_size,
                 vocab_size=vocab_size,
                 num_layers=args.num_layers,
                 dp_keep_prob=args.dp_keep_prob)
 model.cuda()
 print(model)
 print("########## Training ##########################")
 if args.optimizer == "sgd":
     optimizer = torch.optim.SGD(model.parameters(), lr=args.initial_lr)
 else:
     optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr)
 notimprove = 0
 best_val = run_epoch(model, valid_data, optimizer)
 for epoch in range(args.num_epochs):
     is_best = False
     if args.lr_schedule == "default":
    loss.backward()
    torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)

    for p in model.parameters():
        p.data.add_(-lr, p.grad.data)
    if step % 30 == 0:
        print("{} perplexity: {:8.2f} speed: {} wps".format(
            step * 1.0 / epoch_size, np.exp(costs / iters),
            iters * model.batch_size / (time.time() - start_time)))

    return np.exp(costs / iters)


model = LM_LSTM(embedding_dim=HIDDEN_DIM,
                num_steps=num_steps,
                batch_size=batch_size,
                vocab_size=18280,
                num_layers=EMBEDDING_DIM,
                dp_keep_prob=0.9)
model.cuda()
lr = 20
# decay factor for learning rate
lr_decay_base = 1 / 1.15
# we will not touch lr for the first m_flat_lr epochs
m_flat_lr = 14.0

for epoch in range(300):
    prt = False
    if epoch % 30 == 0:
        prt = True
    else:
        prt = False