class LSTMTrainer(Trainer): def __init__(self, opt, emb_matrix=None): self.opt = opt self.emb_matrix = emb_matrix self.model = LSTMClassifier(opt, emb_matrix=emb_matrix) self.criterion = nn.CrossEntropyLoss() self.parameters = [p for p in self.model.parameters() if p.requires_grad] if opt['cuda']: self.model.cuda() self.criterion.cuda() self.optimizer = torch_utils.get_optimizer(opt['optim'], self.parameters, opt['lr']) def update(self, batch): inputs, labels = unpack_batch(batch) # Step 1 init and forward self.model.train() self.optimizer.zero_grad() logits = self.model(inputs) loss = self.criterion(logits, labels) loss_val = loss.item() # Step 2 backward loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.opt['max_grad_norm']) # Step 3 update self.optimizer.step() return loss_val def predict(self, batch, unsort=True): inputs, labels = unpack_batch(batch) self.model.eval() logits = self.model(inputs) loss = self.criterion(logits, labels) loss_val = loss.item() probs = F.softmax(logits, 1).data.cpu().numpy().tolist() predictions = np.argmax(logits.data.cpu().numpy(), axis=1).tolist() labels = labels.data.cpu().numpy().tolist() return predictions, probs, labels, loss_val
def train(dim, args): import torch from torch import nn, optim import numpy as np from features import ExtractWordEmbeddings from preprocess_data import batchify, padBatch from models.lstm import LSTMClassifier from sklearn.utils import shuffle # hyperparameters embedding_dim = 300 # changes only with different word embeddings hidden_dim = args.hidden_dim max_epochs = args.max_epochs is_cuda = True batch_size = 60 lr = args.lr n_decreases = 10 save_dir = 'weights/LSTM/%s' % dim if not os.path.exists(save_dir): os.makedirs(save_dir) """ Loading train / validation datasets X_tr: a list of tokenized sentences y_tr: a list of 0 and 1 """ X_tr, y_tr = loadDatasetForLSTM(dim, 'train') # a list of tokenized sentences X_d, y_d = loadDatasetForLSTM(dim, 'dev') # load model and settings for training model = LSTMClassifier(embedding_dim=embedding_dim, hidden_dim=hidden_dim) if is_cuda: model.cuda() optimizer = optim.AdamW(model.parameters(), lr=lr) flag = True old_val = np.inf # previous validation error em = ExtractWordEmbeddings(emb_type='glove') loss_fn = nn.BCELoss() # train model epoch = 0 cnt_decrease = 0 while (flag): tr_loss = 0.0 epoch += 1 if (epoch > max_epochs) | (cnt_decrease > n_decreases): break # train model.train() # for each iteration, shuffles X_tr and y_tr and puts them into batches X_tr, y_tr = shuffle(X_tr, y_tr) tr_batches = batchify(X_tr, y_tr, batch_size) for X_b, y_b in tr_batches: # X_b is still a list of tokenized sentences (list of list of words) optimizer.zero_grad() """ obtain_vectors_from_sentence(sent=list of words, include_unk=True) : changes each word into an embedding, and returns a list of embeddings padBatch(list of embedding lists, max_seq=None) : for each batch, returns a tensor fixed to the max size, applies zero padding """ inputs = torch.tensor( padBatch([ em.obtain_vectors_from_sentence(sent, True) for sent in X_b ])).float() # here, inputs become a tensor of shape (B * seq_len * dim) targets = torch.tensor(y_b, dtype=torch.float32) if is_cuda: inputs, targets = inputs.cuda(), targets.cuda() outputs = model(inputs) loss = loss_fn(outputs, targets) # error here loss.backward() tr_loss += loss.item() optimizer.step() print("[Epoch %d] train loss: %1.3f" % (epoch, tr_loss)) # validate model.eval() current_loss = 0.0 X_d, y_d = shuffle(X_d, y_d) val_batches = batchify(X_d, y_d, batch_size) with torch.no_grad(): for X_b, y_b in val_batches: inputs = torch.tensor( padBatch([ em.obtain_vectors_from_sentence(sent, True) for sent in X_b ])).float() targets = torch.tensor(y_b, dtype=torch.float32) if is_cuda: inputs, targets = inputs.cuda(), targets.cuda() outputs = model(inputs) loss = loss_fn(outputs, targets) # error here current_loss += loss.item() print("[Epoch %d] validation loss: %1.3f" % (epoch, current_loss)) if current_loss < old_val: # if current round is better than the previous round best_state = model.state_dict() # save this model torch.save(best_state, join(save_dir, 'best-weights.pth')) print("Updated model") old_val = current_loss cnt_decrease = 0 else: # if the current round is doing worse cnt_decrease += 1 if cnt_decrease >= n_decreases: flag = False return