示例#1
0
def load_checkpoint(path):
    checkpoint = torch.load(path, map_location=torch.device('cpu'))
    model = LSTMModel()
    model.load_state_dict(checkpoint['model'])
    optimizer = optim.Adam(model.parameters(), lr=0.0001)
    criterion = nn.CTCLoss()
    return model, optimizer, criterion
示例#2
0
def train_model(train_data, train_target, word_to_idx, target_to_idx, model_file = "model.pth", 
    model_type = "LSTM", embedding_dim = 32, hidden_dim = 16, epochs = 10, learning_rate = 0.1, seed = 19):

    torch.manual_seed(seed)

    ## initialize model
    if model_type == "LSTM":
        model = LSTMModel(embedding_dim, hidden_dim, len(word_to_idx), len(target_to_idx))

    loss_function = nn.NLLLoss()
    optimizer = optim.SGD(model.parameters(), lr = learning_rate)

    st = time.time()

    print("training model ...")

    # reference: http://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html
    count = 0
    loss_mean = 0
    for epoch in range(epochs): 
        for sentence, next_word in zip(train_data, train_target):
            # Step 1. Remember that Pytorch accumulates gradients.
            # We need to clear them out before each instance
            model.zero_grad()

            # Also, we need to clear out the hidden state of the LSTM,
            # detaching it from its history on the last instance.
            model.hidden = model.init_hidden()

            # Step 2. Get our inputs ready for the network, that is, turn them into
            # Variables of word indices.
            sentence_in = prepare_sequence(sentence, word_to_idx)
            targets = prepare_sequence(next_word, target_to_idx)

            # Step 3. Run our forward pass.
            scores = model(sentence_in)

            # Step 4. Compute the loss, gradients, and update the parameters by
            #  calling optimizer.step()
            loss = loss_function(scores, targets)
            loss.backward()
            optimizer.step()
            
            loss_mean += loss.data[0]
            
            if count % 100 == 0 and count > 0:
                print("%d sentence done. loss mean: %f" % (count,loss_mean/100))
                loss_mean = 0
            
            count += 1
            
        print("%d th epoch done. %f sec" % (epoch, time.time() - st))

    return model
示例#3
0
from tqdm import tqdm

dataset = MovieDataLoader()

tr_dl, val_dl, test_dl = dataset.tr_dl, dataset.val_dl, dataset.test_dl
vocab_size = len(dataset.TEXT.vocab)
input_dim = 28
hidden_dim = 128
layer_dim = 1  # ONLY CHANGE IS HERE FROM ONE LAYER TO TWO LAYER
output_dim = 2

model = LSTMModel(vocab_size, hidden_dim, layer_dim, output_dim)

criterion = nn.BCELoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


def test(loss_fn, model, data_loader, device):
    if model.training:
        model.eval()

    total_loss = 0
    correct_count = 0

    for step, batch in tqdm(enumerate(data_loader), total=len(data_loader)):

        data, target = batch.text.to(device), batch.label.to(device)
        output = model(data).to(device)

        with torch.no_grad():