def eval_model(cls, master_gpu_id, model, eval_dataset, eval_batch_size=1, use_cuda=False, num_workers=1): model.eval() eval_dataloader = DataLoader(dataset=eval_dataset, pin_memory=use_cuda, batch_size=eval_batch_size, num_workers=num_workers, shuffle=False) predicted_probs = [] true_labels = [] batch_count = 1 for batch in tqdm(eval_dataloader, unit="batch", ncols=100, desc="Evaluating process: "): labels = batch["label"].cuda(master_gpu_id) if use_cuda and master_gpu_id is not None else batch["label"] tokens = batch['tokens'].cuda(master_gpu_id) if use_cuda and master_gpu_id is not None else batch['tokens'] segment_ids = batch['segment_ids'].cuda(master_gpu_id) if use_cuda and master_gpu_id is not None else batch[ 'segment_ids'] attention_mask = batch["attention_mask"].cuda(master_gpu_id) if use_cuda and master_gpu_id is not None else \ batch["attention_mask"] with torch.no_grad(): output = model(tokens, segment_ids, attention_mask) # 将模型输出转为列表 output = torch.softmax(output, dim=1).cpu().tolist() # 获取正例结果 output = np.array(output)[:, 1] # 将该Batch的正例预测值列表拼接至全局正例预测值列表中 predicted_probs.extend(output.tolist()) # 将真实label列表拼接至全局真实label列表 true_labels.extend(labels.tolist()) LoggerHelper.info("Batch: " + str(batch_count)) batch_count += 1 predicted_probs = [round(prob, 2) for prob in predicted_probs] precision, recall, _thresholds = precision_recall_curve(true_labels, predicted_probs) auc = roc_auc_score(true_labels, predicted_probs) logloss = log_loss(true_labels, predicted_probs) for i in range(len(_thresholds)): log_str_th = 'VAL => Thresholds: {0:>2}, Precision: {1:>7.2%}, Recall: {2:>7.2%}, F1: {3:>7.2%}'.format( _thresholds[i], precision[i], recall[i], f1_score(precision[i], recall[i])) LoggerHelper.info(log_str_th) LoggerHelper.info("AUC: " + str(auc)) LoggerHelper.info("Logloss: " + str(logloss)) LoggerHelper.info("Total Evaluation Samples: " + str(len(true_labels))) LoggerHelper.info("Total Positive Evaluation Samples: " + str(len([x for x in true_labels if x == 1]))) LoggerHelper.info("Total Negtive Evaluation Samples: " + str(len([x for x in true_labels if x == 0]))) return
def eval_model(cls, master_gpu_id, model, eval_dataset, eval_batch_size=1, use_cuda=False, num_workers=1): model.eval() eval_dataloader = DataLoader(dataset=eval_dataset, pin_memory=use_cuda, batch_size=eval_batch_size, num_workers=num_workers, shuffle=False, collate_fn=DiDiDatasetAudio.collate) predicted_probs = [] true_labels = [] batch_count = 1 for batch in tqdm(eval_dataloader, unit="batch", ncols=100, desc="Evaluating process: "): audio_inputs, label_inputs, _, _ = batch label_inputs = label_inputs.cuda(master_gpu_id) if use_cuda and master_gpu_id is not None else label_inputs audio_inputs, audio_length = audio_inputs audio_inputs = audio_inputs.cuda(master_gpu_id) if use_cuda and master_gpu_id is not None else audio_inputs audio_length = audio_length.cuda(master_gpu_id) if use_cuda and master_gpu_id is not None else audio_length with torch.no_grad(): main_output = model(audio_inputs, audio_length) # 将模型输出转为列表 main_output = torch.softmax(main_output, dim=1).cpu().tolist() # 获取正例结果 prob = np.array(main_output)[:, 1] # 将该Batch的正例预测值列表拼接至全局正例预测值列表中 predicted_probs.extend(prob.tolist()) # 将真实label列表拼接至全局真实label列表 true_labels.extend(label_inputs.tolist()) LoggerHelper.info("Batch: " + str(batch_count)) batch_count += 1 predicted_probs = [round(prob, 2) for prob in predicted_probs] precision, recall, _thresholds = precision_recall_curve(true_labels, predicted_probs) auc = roc_auc_score(true_labels, predicted_probs) logloss = log_loss(true_labels, predicted_probs) for i in range(len(_thresholds)): log_str_th = 'VAL => Thresholds: {0:>2}, Precision: {1:>7.2%}, Recall: {2:>7.2%}, F1: {3:>7.2%}'.format( _thresholds[i], precision[i], recall[i], f1_score(precision[i], recall[i])) LoggerHelper.info(log_str_th) LoggerHelper.info("AUC: " + str(auc)) LoggerHelper.info("Logloss: " + str(logloss)) return