def train_model(train_x, train_y, train_m, train_t, test_x, test_y, test_m, test_t, batch_size, epochs, n_batches, unknowns): gen = ner_lib.generate(train_x, train_y, train_m, train_t, batch_size) model = models.BiLSTM(len(word2id), embed_size, hidden_size, len(tag2id), vectors, train_embedding=train_embedding) model.apply(weights_init_uniform_rule) model.cuda() optimizer = t.optim.Adam(model.parameters(), lr=0.01) accs = [] for epoch in range(epochs): total_loss = 0 for batch in range(n_batches): x, y, m, T2S = next(gen) x = t.tensor(x, dtype=t.long).cuda() y = t.tensor(y, dtype=t.long).cuda() m = t.tensor(m).cuda() model.zero_grad() # x, scores, best_tag_sequence = model(x, m) # loss = model.loss_fn(x, m, y) z = model(x, m) z = z.view(-1, len(tag2id)) y = y.view(-1) loss = model.loss_fn(z, y) loss.backward() optimizer.step() batch_loss = loss.detach().cpu().numpy() # print(batch_loss) total_loss += batch_loss accuracy = test2(model, test_x, test_y, test_m, test_t, test_batch_size, unknowns) # loss = total_loss / n_batches # print(print(accuracy[2])) # print("Epoch: {0}, Loss: {1:.3}, Test: {2:.3}, {3:.3}, {4:.3}".format(epoch, loss, accuracy[0][0], accuracy[1][0], accuracy[2][0])) # accuracy.insert(0, [loss]) print(accuracy) accs.append(accuracy) return accs
train_data = TextDataset(f'data/{args.data}_train.jsonl', tokenizer, ['tokens', 'tags']) test_data = TextDataset(f'data/{args.data}_test.jsonl', tokenizer, ['tokens', 'tags']) train_iterator = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, collate_fn=train_data.collate) test_iterator = DataLoader(test_data, batch_size=args.batch_size, shuffle=False, collate_fn=test_data.collate) token_vocab_size = len(tokenizer.vocabs['tokens'].itos) tag_vocab_size = len(tokenizer.vocabs['tags'].itos) token_pad_token = tokenizer.vocabs['tokens'].pad_token token_pad_idx = tokenizer.vocabs['tokens'].stoi[token_pad_token] tag_pad_token = tokenizer.vocabs['tags'].pad_token tag_pad_idx = tokenizer.vocabs['tags'].stoi[tag_pad_token] model = models.BiLSTM(token_vocab_size, args.embedding_dim, args.hidden_dim, args.n_layers, args.dropout, token_pad_idx) head = models.TagHead(args.hidden_dim, tag_vocab_size) if args.load is not None: model.load_state_dict(torch.load(args.load)) model = model.cuda() head = head.cuda() optimizer = optim.Adam(list(model.parameters()) + list(head.parameters()), lr=args.lr) criterion = nn.CrossEntropyLoss(ignore_index=tag_pad_idx) criterion = criterion.cuda()
if 'lin' in model_type: hidden_sizes = [] hidden_sizes.append(lstm_hidden_size) linear_hidden = [ x for x in linear_hidden_sizes if x < lstm_hidden_size ] linear_hidden_ind = np.random.randint(0, len(linear_hidden)) linear_hidden_size = linear_hidden_sizes[linear_hidden_ind] hidden_sizes.append(linear_hidden_size) # Initialize model model = None if model_type == 'bilstm': model = models.BiLSTM(input_size, lstm_hidden_size, layers, num_classes, device, rnn_dropout, other_dropout).to(device) elif model_type == 'bigru': model = models.BiGRU(input_size, lstm_hidden_size, layers, num_classes, device, rnn_dropout, other_dropout).to(device) elif model_type == 'bilstm-lin': model = models.BiLSTMLin(input_size, hidden_sizes, layers, num_classes, device, rnn_dropout, other_dropout).to(device) elif model_type == 'bigru-lin': model = models.BiGRULin(input_size, hidden_sizes, layers, num_classes, device, rnn_dropout, other_dropout).to(device) elif model_type == 'bilstm-attn': model = models.BiLSTMAttn(input_size, lstm_hidden_size, layers, num_classes, device, rnn_dropout, other_dropout).to(device) elif model_type == 'bigru-attn':
def run_rnn_exp(data, embedding_matrix, token_to_idx, seed=0, weight_decay=0.0, lr=0.001, max_len=51, batch_size=128, idx_to_label=['negative', 'neutral', 'positive'], embedding_freeze=True, embedding_normalize=True, obj='loss', measures=['loss', 'macro_f1', 'acc', 'avgrecall'], epoches=50, silent=False, cuda=-1): # set seed for reproduciable seed = seed torch.backends.cudnn.deterministic = True torch.cuda.manual_seed_all(seed) random.seed(seed) torch.manual_seed(seed) np.random.seed(seed) # Load data into numpy format train_list_sentences, train_list_labels = data_helper.map_to_num_rnn( data['train'][0], data['train'][1], token_to_idx, idx_to_label=idx_to_label, max_len=max_len) dev_list_sentences, dev_list_labels = data_helper.map_to_num_rnn( data['dev'][0], data['dev'][1], token_to_idx, idx_to_label=idx_to_label, max_len=max_len) test_list_sentences, test_list_labels = data_helper.map_to_num_rnn( data['test'][0], data['test'][1], token_to_idx, idx_to_label=idx_to_label, max_len=max_len) # create sampler to solve imbalance of training data train_num_count = [0] * len(idx_to_label) for label in train_list_labels: train_num_count[label] += 1 if not silent: print(train_num_count) sample_weights = [0.0] * len(train_list_labels) for i, label in enumerate(train_list_labels): sample_weights[i] = len(train_list_labels) / train_num_count[label] sampler = torch.utils.data.sampler.WeightedRandomSampler( sample_weights, len(sample_weights)) # create iter for train, dev and test train_iter = DataLoader(data_helper.BasicDataset(train_list_sentences, train_list_labels), batch_size=batch_size, sampler=sampler, collate_fn=data_helper.rnn_collate_fn_cuda) dev_iter = DataLoader(data_helper.BasicDataset(dev_list_sentences, dev_list_labels), batch_size=batch_size, collate_fn=data_helper.rnn_collate_fn_cuda) test_iter = DataLoader(data_helper.BasicDataset(test_list_sentences, test_list_labels), batch_size=batch_size, collate_fn=data_helper.rnn_collate_fn_cuda) model = models.BiLSTM(embedding_matrix, hidden_size=150, num_layer=2, embedding_freeze=True, embedding_normalize=True, max_norm=5.0, num_classes=2) if cuda != -1: model.cuda(cuda) # start training criterion = torch.nn.CrossEntropyLoss(size_average=False) optimizer = torch.optim.Adam(model.custom_params, lr=lr, weight_decay=weight_decay) obj_value = 0.0 final_metrics = { 'loss': 0.0, 'macro_f1': 0.0, 'acc': 0.0, 'avgrecall': 0.0 } for epoch in range(epoches): start_time = time.time() model.train() train_sum_loss = 0.0 train_count = 0 train_predict = [] train_gold = [] batch = "" for batch in train_iter: model.hidden1 = model.init_hidden( batch_size=int(batch['labels'].data.size()[0])) model.hidden2 = model.init_hidden( batch_size=int(batch['labels'].data.size()[0])) optimizer.zero_grad() outputs = model(batch['sentence']) _, outputs_label = torch.max(outputs, 1) for label in outputs_label.data: train_predict.append(int(label)) for label in batch['labels'].data: train_gold.append(int(label)) loss = criterion(outputs, batch['labels']) loss.backward() optimizer.step() train_sum_loss += loss.data[0] train_count += batch['labels'].shape[0] train_metrics_result = metrics.evaluation_metrics( train_gold, train_predict, measures=measures, idx_to_label=idx_to_label) train_metrics_result['loss'] = train_sum_loss / train_count if not silent: output_str = "[{}/{}]\ntrain\t".format(epoch + 1, epoches) for key in measures: output_str += "{}={:.4f}\t".format(key, train_metrics_result[key]) print(output_str) model.eval() dev_sum_loss = 0.0 dev_count = 0 dev_predict = [] dev_gold = [] for batch in dev_iter: model.hidden1 = model.init_hidden( batch_size=int(batch['labels'].data.size()[0])) model.hidden2 = model.init_hidden( batch_size=int(batch['labels'].data.size()[0])) optimizer.zero_grad() outputs = model(batch['sentence']) _, outputs_label = torch.max(outputs, 1) for label in outputs_label.data: dev_predict.append(int(label)) for label in batch['labels'].data: dev_gold.append(int(label)) loss = criterion(outputs, batch['labels']) dev_sum_loss += loss.data[0] dev_count += batch['labels'].shape[0] dev_metrics_result = metrics.evaluation_metrics( dev_gold, dev_predict, measures=measures, idx_to_label=idx_to_label) dev_metrics_result['loss'] = dev_sum_loss / dev_count if not silent: output_str = "dev\t".format(epoch + 1, epoches) for key in measures: output_str += "{}={:.4f}\t".format(key, dev_metrics_result[key]) print(output_str) test_sum_loss = 0.0 test_count = 0 test_predict = [] test_gold = [] for batch in test_iter: model.hidden1 = model.init_hidden( batch_size=int(batch['labels'].data.size()[0])) model.hidden2 = model.init_hidden( batch_size=int(batch['labels'].data.size()[0])) optimizer.zero_grad() outputs = model(batch['sentence']) _, outputs_label = torch.max(outputs, 1) for label in outputs_label.data: test_predict.append(int(label)) for label in batch['labels'].data: test_gold.append(int(label)) loss = criterion(outputs, batch['labels']) test_sum_loss += loss.data[0] test_count += batch['labels'].shape[0] test_metrics_result = metrics.evaluation_metrics( test_gold, test_predict, measures=measures, idx_to_label=idx_to_label) test_metrics_result['loss'] = test_sum_loss / test_count if not silent: output_str = "test\t".format(epoch + 1, epoches) for key in measures: output_str += "{}={:.4f}\t".format( key, round(test_metrics_result[key], 5)) print(output_str) # output time if not silent: print("cost time:{}".format(time.time() - start_time)) # early stop procedure if epoch == 0: obj_value = dev_metrics_result[obj] final_metrics = test_metrics_result else: if obj != 'loss': if dev_metrics_result[obj] > obj_value: obj_value = dev_metrics_result[obj] final_metrics = test_metrics_result else: if dev_metrics_result[obj] < obj_value: obj_value = dev_metrics_result[obj] final_metrics = test_metrics_result return obj_value, final_metrics, model.embed.weight.data.cpu().numpy()