def train(samples, labels, embeddings, n_epochs=2, vocab_size=-1, lr=0.01, use_pretrained_wv=False, n_classes=2, pad_id=0, batch_size=1): lens = [len(s) for s in samples] lens_samples_labels = list(zip(lens, samples, labels)) model = rnn_model_batch.rnn_model(device, word_embedding=embeddings, n_classes=n_classes, vocab_size=vocab_size, use_pretrained_wv=use_pretrained_wv, bi=bi) model = model.cuda() print(f' created RNN (GRU) model: {model}') # criterion = nn.NLLLoss() criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(model.parameters(), lr=lr) optimizer = optim.Adam(model.parameters(), lr=lr) model.train() for e in range(1, n_epochs + 1): t0 = time() losses = [] shuffle(lens_samples_labels) for batch_lens_samples_labels in utils.get_batch( lens_samples_labels, batch_size=batch_size): batch_lens_samples_labels = sorted(batch_lens_samples_labels, reverse=True) batch_lens, sents, labels = zip(*batch_lens_samples_labels) batch_lens = list(batch_lens) max_sent_len = batch_lens[0] sents = [ utils.pad_token_ids(s, pad_id, max_len=max_sent_len) for s in sents ] sents = torch.tensor(sents, device=device, dtype=torch.long) labels = torch.tensor(labels, device=device, dtype=torch.long) optimizer.zero_grad() hidden = model.initHidden(batch_size=len(labels), bi=bi) logprobs = model(sents, batch_lens, hidden) loss = criterion(logprobs, labels) losses.append(loss.item()) loss.backward() optimizer.step() epoch_loss = sum(losses) / len(losses) epoch_time = round(time() - t0, 2) print(f'epoch: {e}, time: {epoch_time}, loss: {epoch_loss}') return model
def train(train_loader, embeddings, n_epochs=2, vocab_size=-1, lr=0.01, use_pretrained_wv=False, n_classes=2): model = rnn_model_batch.rnn_model(device, word_embedding=embeddings, n_classes=n_classes, vocab_size=vocab_size, use_pretrained_wv=use_pretrained_wv, bi=bi) model = model.cuda() print(f' created RNN (GRU) model: {model}') # criterion = nn.NLLLoss() criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(model.parameters(), lr=lr) optimizer = optim.Adam(model.parameters(), lr=lr) model.train() for e in range(1, n_epochs + 1): t0 = time() losses = [] for batch_data, batch_labels in train_loader: sents = batch_data labels = batch_labels optimizer.zero_grad() hidden = model.initHidden(batch_size=len(labels), bi=bi) logprobs = model(sents, hidden, is_batch=True) loss = criterion(logprobs, labels) losses.append(loss.item()) loss.backward() optimizer.step() epoch_loss = sum(losses) / len(losses) epoch_time = round(time() - t0, 2) print(f'epoch: {e}, time: {epoch_time}, loss: {epoch_loss}') return model
def train(samples, labels, embeddings, n_epochs=2, vocab_size=-1, lr=0.01, use_pretrained_wv=False, pad_id=0, batch_size=1, test_dict=None, test_every=0, use_wt_loss=True, max_len=64): lens = [len(s) for s in samples] lens_samples_labels = list(zip(lens, samples, labels)) n_classes = len(set(labels)) model = rnn_model_batch.rnn_model(device, word_embedding=embeddings, n_classes=n_classes, vocab_size=vocab_size, use_pretrained_wv=use_pretrained_wv, bi=bi, n_layers=1, max_len=max_len) model = model.cuda() print(f' created RNN (GRU) model: {model}') if use_wt_loss: label_freq_counts = Counter(labels) print('class freq dist: ', sorted(label_freq_counts.items())) # total_labels_count = len(labels) # label_probs = {label: count / total_labels_count for label, count in label_freq_counts.items()} # wts = [label_probs[label] for label in sorted(label_probs.keys())] wts = [ 1. / label_freq_counts[label] for label in sorted(label_freq_counts.keys()) ] # wts = [-np.log10(w) for w in wts] wts = torch.tensor(wts, device=device, dtype=torch.float) print('weights for loss function: ', wts) criterion = nn.CrossEntropyLoss(weight=wts, reduction='sum') else: criterion = nn.CrossEntropyLoss() # optimizer = optim.SGD(model.parameters(), lr=lr) optimizer = optim.Adam(model.parameters(), lr=lr) # optimizer = optim.RMSprop(model.parameters(), lr=lr) for e in range(1, n_epochs + 1): model.train() t0 = time() losses = [] shuffle(lens_samples_labels) for batch_lens_samples_labels in utils.get_batch( lens_samples_labels, batch_size=batch_size): batch_lens_samples_labels = sorted(batch_lens_samples_labels, reverse=True) batch_lens, sents, labels = zip(*batch_lens_samples_labels) batch_lens = list(batch_lens) sents = [ utils.pad_token_ids(s, pad_id, max_len=max_len) for s in sents ] sents = torch.tensor(sents, device=device, dtype=torch.long) labels = torch.tensor(labels, device=device, dtype=torch.long) optimizer.zero_grad() hidden = model.initHidden(batch_size=len(labels), bi=bi) logprobs = model(sents, batch_lens, hidden) loss = criterion(logprobs, labels) losses.append(loss.item()) loss.backward() zero_tensor = torch.zeros(100, device=device) model.embedding._parameters['weight'].grad[0] = zero_tensor optimizer.step() epoch_loss = sum(losses) / len(losses) epoch_time = round(time() - t0, 2) print(f'epoch: {e}, time: {epoch_time} sec., loss: {epoch_loss}') if e % test_every == 0: if test_dict: X_test = test_dict['X_test'] y_test = test_dict['y_test'] pad_id = test_dict['pad_id'] a, f, p, r = test_batch(model, X_test, y_test, pad_id, batch_size=1, max_len=max_len) print('*' * 80) print( f'epoch: {e}, on TEST SET acc: {a}, f1: {f}, precision: {p}, recall: {r}' ) print('*' * 80) return model