def load_checkpoint(path): checkpoint = torch.load(path, map_location=torch.device('cpu')) model = LSTMModel() model.load_state_dict(checkpoint['model']) optimizer = optim.Adam(model.parameters(), lr=0.0001) criterion = nn.CTCLoss() return model, optimizer, criterion
def train_model(train_data, train_target, word_to_idx, target_to_idx, model_file = "model.pth", model_type = "LSTM", embedding_dim = 32, hidden_dim = 16, epochs = 10, learning_rate = 0.1, seed = 19): torch.manual_seed(seed) ## initialize model if model_type == "LSTM": model = LSTMModel(embedding_dim, hidden_dim, len(word_to_idx), len(target_to_idx)) loss_function = nn.NLLLoss() optimizer = optim.SGD(model.parameters(), lr = learning_rate) st = time.time() print("training model ...") # reference: http://pytorch.org/tutorials/beginner/nlp/sequence_models_tutorial.html count = 0 loss_mean = 0 for epoch in range(epochs): for sentence, next_word in zip(train_data, train_target): # Step 1. Remember that Pytorch accumulates gradients. # We need to clear them out before each instance model.zero_grad() # Also, we need to clear out the hidden state of the LSTM, # detaching it from its history on the last instance. model.hidden = model.init_hidden() # Step 2. Get our inputs ready for the network, that is, turn them into # Variables of word indices. sentence_in = prepare_sequence(sentence, word_to_idx) targets = prepare_sequence(next_word, target_to_idx) # Step 3. Run our forward pass. scores = model(sentence_in) # Step 4. Compute the loss, gradients, and update the parameters by # calling optimizer.step() loss = loss_function(scores, targets) loss.backward() optimizer.step() loss_mean += loss.data[0] if count % 100 == 0 and count > 0: print("%d sentence done. loss mean: %f" % (count,loss_mean/100)) loss_mean = 0 count += 1 print("%d th epoch done. %f sec" % (epoch, time.time() - st)) return model
from tqdm import tqdm dataset = MovieDataLoader() tr_dl, val_dl, test_dl = dataset.tr_dl, dataset.val_dl, dataset.test_dl vocab_size = len(dataset.TEXT.vocab) input_dim = 28 hidden_dim = 128 layer_dim = 1 # ONLY CHANGE IS HERE FROM ONE LAYER TO TWO LAYER output_dim = 2 model = LSTMModel(vocab_size, hidden_dim, layer_dim, output_dim) criterion = nn.BCELoss() learning_rate = 0.1 optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) def test(loss_fn, model, data_loader, device): if model.training: model.eval() total_loss = 0 correct_count = 0 for step, batch in tqdm(enumerate(data_loader), total=len(data_loader)): data, target = batch.text.to(device), batch.label.to(device) output = model(data).to(device) with torch.no_grad():