def test(): # 配置文件 cf = Config('./config.yaml') # 有GPU用GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 测试数据 test_data = NewsDataset("./data/cnews_final_test.txt", cf.max_seq_len) test_dataloader = DataLoader(test_data, batch_size=cf.batch_size, shuffle=True) # 模型 config = BertConfig("./output/pytorch_bert_config.json") model = BertForSequenceClassification(config, num_labels=cf.num_labels) model.load_state_dict(torch.load("./output/pytorch_model.bin")) # 把模型放到指定设备 model.to(device) # 让模型并行化运算 if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # 训练 start_time = time.time() data_len = len(test_dataloader) model.eval() y_pred = np.array([]) y_test = np.array([]) # for step,batch in enumerate(tqdm(test_dataloader,"batch",total=len(test_dataloader))): for step, batch in enumerate(test_dataloader): label_id = batch['label_id'].squeeze(1).to(device) word_ids = batch['word_ids'].to(device) segment_ids = batch['segment_ids'].to(device) word_mask = batch['word_mask'].to(device) loss = model(word_ids, segment_ids, word_mask, label_id) with torch.no_grad(): pred = get_model_labels(model, word_ids, segment_ids, word_mask) y_pred = np.hstack((y_pred, pred)) y_test = np.hstack((y_test, label_id.to("cpu").numpy())) # 评估 print("Precision, Recall and F1-Score...") print( metrics.classification_report(y_test, y_pred, target_names=get_labels('./data/label'))) # 混淆矩阵 print("Confusion Matrix...") cm = metrics.confusion_matrix(y_test, y_pred) print(cm)
def create_bert_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = BertForSequenceClassification(config=config, num_labels=self.num_labels) model.eval() loss = model(input_ids, token_type_ids, input_mask, sequence_labels) logits = model(input_ids, token_type_ids, input_mask) outputs = { "loss": loss, "logits": logits, } return outputs
torch.save(model.state_dict(), output_model_file + '_epoch_' + str(epoch) + '.bin') #validate test_model = BertForSequenceClassification(bert_config, num_labels=len(y_columns)) #paralleism test_model = nn.DataParallel(test_model) test_model.load_state_dict( torch.load(output_model_file + '_epoch_' + str(epoch) + '.bin')) test_model.to(device) for param in test_model.parameters(): param.requires_grad = False test_model.eval() valid_preds = np.zeros((len(X_val))) print(valid_preds.size) valid = torch.utils.data.TensorDataset( torch.tensor(X_val, dtype=torch.long)) valid_loader = torch.utils.data.DataLoader(valid, batch_size=256, shuffle=False) tk0 = tqdm(valid_loader) for i, (x_batch, ) in enumerate(tk0): pred = test_model(x_batch.to(device), attention_mask=(x_batch > 0).to(device), labels=None) valid_preds[i * 256:(i + 1) * 256] = pred[:, 0].detach().cpu().squeeze().numpy()
class TransformersClassifierHandler(BaseHandler, ABC): """ Transformers text classifier handler class. This handler takes a text (string) and as input and returns the classification text based on the serialized transformers checkpoint. """ def __init__(self): super(TransformersClassifierHandler, self).__init__() self.initialized = False def initialize(self, ctx): properties = ctx.system_properties MODEL_DIR = properties.get("model_dir") self.device = torch.device("cuda:" + str(properties.get("gpu_id")) if torch.cuda. is_available() else "cpu") self.labelencoder = preprocessing.LabelEncoder() self.labelencoder.classes_ = np.load( os.path.join(MODEL_DIR, 'classes.npy')) config = BertConfig(os.path.join(MODEL_DIR, 'bert_config.json')) self.model = BertForSequenceClassification( config, num_labels=len(self.labelencoder.classes_)) self.model.load_state_dict( torch.load(os.path.join(MODEL_DIR, 'pytorch_model.bin'), map_location="cpu")) self.model.to(self.device) self.model.eval() self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") self.softmax = torch.nn.Softmax(dim=-1) # self.batch_size = batch_size logger.debug( 'Transformer model from path {0} loaded successfully'.format( MODEL_DIR)) self.manifest = ctx.manifest self.initialized = True def preprocess(self, data): ids = [] segment_ids = [] input_masks = [] MAX_LEN = 128 for sen in data: text_tokens = self.tokenizer.tokenize(sen) tokens = ["[CLS]"] + text_tokens + ["[SEP]"] temp_ids = self.tokenizer.convert_tokens_to_ids(tokens) input_mask = [1] * len(temp_ids) segment_id = [0] * len(temp_ids) padding = [0] * (MAX_LEN - len(temp_ids)) temp_ids += padding input_mask += padding segment_id += padding ids.append(temp_ids) input_masks.append(input_mask) segment_ids.append(segment_id) ## Convert input list to Torch Tensors ids = torch.tensor(ids) segment_ids = torch.tensor(segment_ids) input_masks = torch.tensor(input_masks) validation_data = TensorDataset(ids, input_masks, segment_ids) validation_sampler = SequentialSampler(validation_data) validation_dataloader = DataLoader( validation_data, sampler=validation_sampler, batch_size=len(data), num_workers=self.dataloader_num_workers) return validation_dataloader def inference(self, validation_dataloader): """ Predict the class of a text using a trained transformer model. """ # NOTE: This makes the assumption that your model expects text to be tokenized # with "input_ids" and "token_type_ids" - which is true for some popular transformer models, e.g. bert. # If your transformer model expects different tokenization, adapt this code to suit # its expected input format. responses = [] for batch in validation_dataloader: # Add batch to GPU batch = tuple(t.to(self.device) for t in batch) # Unpack the inputs from our dataloader b_input_ids, b_input_mask, b_labels = batch with torch.no_grad(): # Forward pass, calculate logit predictions logits = self.model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask) for i in range(logits.size(0)): label_idx = [ self.softmax( logits[i]).detach().cpu().numpy().argmax() ] label_str = self.labelencoder.inverse_transform( label_idx)[0] responses.append(label_str) return responses def postprocess(self, inference_output): # TODO: Add any needed post-processing of the model predictions here return inference_output
optimizer.step() # Now we can do an optimizer step optimizer.zero_grad() if lossf: lossf = 0.98*lossf+0.02*loss.item() else: lossf = loss.item() tk0.set_postfix(loss = lossf) avg_loss += loss.item() / len(train_loader) avg_accuracy += torch.mean(((torch.sigmoid(y_pred[:,0])>0.5) == (y_batch[:,0]>0.5).to(device)).to(torch.float) ).item()/len(train_loader) tq.set_postfix(avg_loss=avg_loss,avg_accuracy=avg_accuracy) torch.save(model.state_dict(), output_model_file) # Run validation # The following 2 lines are not needed but show how to download the model for prediction model = BertForSequenceClassification(bert_config,num_labels=len(y_columns)) model.load_state_dict(torch.load(output_model_file )) model.to(device) for param in model.parameters(): param.requires_grad=False model.eval() valid_preds = np.zeros((len(X_val))) valid = torch.utils.data.TensorDataset(torch.tensor(X_val,dtype=torch.long)) valid_loader = torch.utils.data.DataLoader(valid, batch_size=32, shuffle=False) tk0 = tqdm_notebook(valid_loader) for i,(x_batch,) in enumerate(tk0): pred = model(x_batch.to(device), attention_mask=(x_batch>0).to(device), labels=None) valid_preds[i*32:(i+1)*32]=pred[:,0].detach().cpu().squeeze().numpy()
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH) x_test = convert_lines(x_test,MAX_LEN,tokenizer) x_test_cuda = torch.tensor(x_test, dtype=torch.long).cuda() test_data = torch.utils.data.TensorDataset(x_test_cuda) test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False) ## load fine-tuned model bert_config = BertConfig('../input/bert-pretrained-models/uncased_l-12_h-768_a-12/uncased_L-12_H-768_A-12/bert_config.json') net = BertForSequenceClassification(bert_config,num_labels=6) net.load_state_dict(torch.load("../input/bert-model3/bert_pytorch_v3.pt")) net.cuda() ## inference net.eval() result_1 = list() with torch.no_grad(): for (x_batch,) in test_loader: y_pred = net(x_batch) y_pred = torch.sigmoid(y_pred.cpu()).numpy()[:,0] result_1.extend(y_pred) result_1 = np.array(result_1) net = BertForSequenceClassification(bert_config,num_labels=6) net.load_state_dict(torch.load("../input/bert-model4/bert_pytorch_v4.pt")) net.cuda() ## inference
def train_unfixed(): # 配置文件 cf = Config('./config.yaml') # 有GPU用GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 训练数据 train_data = NewsDataset("./data/cnews_final_train.txt", cf.max_seq_len) train_dataloader = DataLoader(train_data, batch_size=cf.batch_size, shuffle=True) # 测试数据 test_data = NewsDataset("./data/cnews_final_test.txt", cf.max_seq_len) test_dataloader = DataLoader(test_data, batch_size=cf.batch_size, shuffle=True) # 模型 config = BertConfig("./output/pytorch_bert_config.json") model = BertForSequenceClassification(config, num_labels=cf.num_labels) model.load_state_dict(torch.load("./output/pytorch_model.bin")) # 优化器用adam for param in model.parameters(): param.requires_grad = True param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] num_train_optimization_steps = int( len(train_data) / cf.batch_size) * cf.epoch optimizer = BertAdam(optimizer_grouped_parameters, lr=cf.lr, t_total=num_train_optimization_steps) # 把模型放到指定设备 model.to(device) # 让模型并行化运算 if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # 训练 start_time = time.time() total_batch = 0 # 总批次 best_acc_val = 0.0 # 最佳验证集准确率 last_improved = 0 # 记录上一次提升批次 require_improvement = 1500 # 如果超过1500轮未提升,提前结束训练 # 获取当前验证集acc model.eval() _, best_acc_val = evaluate(model, test_dataloader, device) flag = False model.train() for epoch_id in range(cf.epoch): print("Epoch %d" % epoch_id) for step, batch in enumerate( tqdm(train_dataloader, desc="batch", total=len(train_dataloader))): # for step,batch in enumerate(train_dataloader): label_id = batch['label_id'].squeeze(1).to(device) word_ids = batch['word_ids'].to(device) segment_ids = batch['segment_ids'].to(device) word_mask = batch['word_mask'].to(device) loss = model(word_ids, segment_ids, word_mask, label_id) loss.backward() optimizer.step() optimizer.zero_grad() total_batch += 1 if total_batch % cf.print_per_batch == 0: model.eval() with torch.no_grad(): loss_train, acc_train = get_model_loss_acc( model, word_ids, segment_ids, word_mask, label_id) loss_val, acc_val = evaluate(model, test_dataloader, device) if acc_val > best_acc_val: # 保存最好结果 best_acc_val = acc_val last_improved = total_batch torch.save(model.state_dict(), "./output/pytorch_model.bin") with open("./output/pytorch_bert_config.json", 'w') as f: f.write(model.config.to_json_string()) improved_str = "*" else: improved_str = "" time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print( msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str)) model.train() if total_batch - last_improved > require_improvement: print("长时间未优化") flag = True break if flag: break