def train(lr, train_loader, test_dataset): model = TextRNN().cuda() loss_fn = nn.MultiLabelSoftMarginLoss() optimizer = optim.Adam(model.parameters(), lr=lr) best_acc = 0 for epoch in range(train_epochs): for step, (x_batch, y_batch) in enumerate(train_loader): x, y = x_batch.cuda(), y_batch.cuda() # FF y_pred = model(x) loss = loss_fn(y_pred, y) # BF optimizer.zero_grad() loss.backward() optimizer.step() acc = np.mean( (torch.argmax(y_pred, 1) == torch.argmax(y, 1)).cpu().numpy()) print('Training epoch {:}, loss = {:}, acc = {:}'.format( epoch + 1, loss.item(), acc)) if (epoch + 1) % 5 == 0: for step, (x_batch, y_batch) in enumerate(test_loader): x, y = x_batch.cuda(), y_batch.cuda() # FF y_pred = model(x) acc = np.mean( (torch.argmax(y_pred, 1) == torch.argmax(y, 1)).cpu().numpy()) # print('Test acc = {:}'.format(acc)) if acc > best_acc: best_acc = acc torch.save(model.state_dict(), 'model_params.pkl')
def train(): # 配置文件 cf = Config('./config.yaml') # 有GPU用GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 训练数据 train_data = NewsDataset("./data/cnews_final_train.txt", cf.max_seq_len) train_dataloader = DataLoader(train_data, batch_size=cf.batch_size, shuffle=True) # 测试数据 test_data = NewsDataset("./data/cnews_final_test.txt", cf.max_seq_len) test_dataloader = DataLoader(test_data, batch_size=cf.batch_size, shuffle=True) # 预训练词向量矩阵 embedding_matrix = get_pre_embedding_matrix("./data/final_vectors") # 模型 model = TextRNN(cf, torch.tensor(embedding_matrix)) # 优化器用adam optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters())) # 把模型放到指定设备 model.to(device) # 让模型并行化运算 if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # 训练 start_time = time.time() total_batch = 0 # 总批次 best_acc_val = 0.0 # 最佳验证集准确率 last_improved = 0 # 记录上一次提升批次 require_improvement = 1000 # 如果超过1000轮未提升,提前结束训练 flag = False model.train() for epoch_id in trange(cf.epoch, desc="Epoch"): # for step,batch in enumerate(tqdm(train_dataloader,"batch",total=len(train_dataloader))): for step, batch in enumerate(train_dataloader): label_id = batch['label_id'].squeeze(1).to(device) seq_len = batch["seq_len"].to(device) segment_ids = batch['segment_ids'].to(device) # 将序列按长度降序排列 seq_len, perm_idx = seq_len.sort(0, descending=True) label_id = label_id[perm_idx] segment_ids = segment_ids[perm_idx].transpose(0, 1) loss = model(segment_ids, seq_len, label_id) loss.backward() optimizer.step() optimizer.zero_grad() total_batch += 1 if total_batch % cf.print_per_batch == 0: model.eval() with torch.no_grad(): loss_train, acc_train = model.get_loss_acc( segment_ids, seq_len, label_id) loss_val, acc_val = evaluate(model, test_dataloader, device) if acc_val > best_acc_val: # 保存最好结果 best_acc_val = acc_val last_improved = total_batch torch.save(model.state_dict(), "./output/model.bin") improved_str = "*" else: improved_str = "" time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print( msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str)) model.train() if total_batch - last_improved > require_improvement: print("长时间未优化") flag = True break if flag: break
for epoch in range(EPOCH): start_time = time.time() for i, data in enumerate(train_loader): model.train() inputs, labels = data inputs, labels = inputs.to(device), labels.to(device) # 前向传播 outputs = model(inputs) # 计算损失函数 loss = criterion(outputs, labels) # 清空上一轮梯度 optimizer.zero_grad() # 反向传播 loss.backward() # 参数更新 optimizer.step() accuracy = torch.mean((torch.argmax(outputs, 1) == labels.data).float()) print('epoch{} loss:{:.4f} acc:{:.4f} time:{:.4f}'.format( epoch + 1, loss.item(), accuracy.item(), time.time() - start_time)) if epoch % 1 == 0: val_acc, val_loss = evaluate(model, val_loader) print('epoch{} val_loss:{:.4f} val_acc:{:.4f}'.format( epoch + 1, val_loss, val_acc)) if val_acc > best_acc: best_epoch = epoch best_acc = val_acc torch.save(model.state_dict(), file_name) print('best acc:', best_acc, 'best epoch:', best_epoch)