def evaluate(self): with torch.no_grad(): sentences, labels, length = zip(*self.dev_batch.__next__()) _, paths = self.model(sentences) print("\teval") for tag in self.tags: f1_score(labels, paths, tag, self.model.tag_map)
def test_normalization(self): scaling_functions = { 'min_max_scale': MinMaxScaler, 'normalize': NormalizationScaler, } distance_funcs = { 'euclidean': euclidean_distance, 'gaussian': gaussian_kernel_distance, 'inner_prod': inner_product_distance, } features, labels = generate_data_cancer() train_features, train_labels = features[:400], labels[:400] valid_features, valid_labels = features[400:460], labels[400:460] test_features, test_labels = features[460:], labels[460:] assert len(train_features) == len(train_labels) == 400 assert len(valid_features) == len(valid_labels) == 60 assert len(test_features) == len(test_labels) == 109 for scaling_name, scaling_class in scaling_functions.items(): for name, func in distance_funcs.items(): scaler = scaling_class() train_features_scaled = scaler(train_features) valid_features_scaled = scaler(valid_features) best_f1_score, best_k = 0, -1 for k in [1, 3, 10, 20, 50]: model = KNN(k=k, distance_function=func) model.train(train_features_scaled, train_labels) train_f1_score = f1_score( train_labels, model.predict(train_features_scaled)) valid_f1_score = f1_score( valid_labels, model.predict(valid_features_scaled)) print('[part 2.2] {name}\t{scaling_name}\tk: {k:d}\t'. format(name=name, scaling_name=scaling_name, k=k) + 'train: {train_f1_score:.5f}\t'.format( train_f1_score=train_f1_score) + 'valid: {valid_f1_score:.5f}'.format( valid_f1_score=valid_f1_score)) if valid_f1_score > best_f1_score: best_f1_score, best_k = valid_f1_score, k # now change it to new scaler, since the training set changes scaler = scaling_class() combined_features_scaled = scaler(train_features + valid_features) test_features_scaled = scaler(test_features) model = KNN(k=best_k, distance_function=func) model.train(combined_features_scaled, train_labels + valid_labels) test_f1_score = f1_score(test_labels, model.predict(test_features_scaled)) print() print('[part 2.2] {name}\t{scaling_name}\t'.format( name=name, scaling_name=scaling_name) + 'best_k: {best_k:d}\ttest: {test_f1_score:.5f}'.format( best_k=best_k, test_f1_score=test_f1_score)) print()
def evaluate(self): sentences, labels, length = zip(*self.dev_batch.__next__()) if (self.use_gpu): sentences = torch.tensor(sentences, dtype=torch.long).cuda() _, paths = self.model(sentences) print("\teval") for tag in self.tags: f1_score(labels, paths, tag, self.model.tag_map)
def main(): distance_funcs = { 'euclidean': Distances.euclidean_distance, 'minkowski': Distances.minkowski_distance, 'gaussian': Distances.gaussian_kernel_distance, 'inner_prod': Distances.inner_product_distance, 'cosine_dist': Distances.cosine_similarity_distance, } scaling_classes = { 'min_max_scale': MinMaxScaler, 'normalize': NormalizationScaler, } x_train, y_train, x_val, y_val, x_test, y_test = data_processing() print('x_train shape = ', x_train.shape) print('y_train shape = ', y_train.shape) print('x_val shape = ', x_val.shape) print('y_val shape = ', y_val.shape) print('x_test shape = ', x_test.shape) print('y_test shape = ', y_test.shape) tuner_without_scaling_obj = HyperparameterTuner() tuner_without_scaling_obj.tuning_without_scaling(distance_funcs, x_train, y_train, x_val, y_val) print("**Without Scaling**") print("k =", tuner_without_scaling_obj.best_k) print("distance function =", tuner_without_scaling_obj.best_distance_function) print("f1_score=", tuner_without_scaling_obj.best_f1_score) pred = tuner_without_scaling_obj.best_model.predict(x_test) correct = 0 for i in range(len(pred)): if pred[i] == y_test[i]: correct += 1 accuracy = float(correct)/len(pred) print ("Accuracy is: ", accuracy) print ("F1 Score: ", f1_score(y_test, pred)) tuner_with_scaling_obj = HyperparameterTuner() tuner_with_scaling_obj.tuning_with_scaling(distance_funcs, scaling_classes, x_train, y_train, x_val, y_val) # print("\n**With Scaling**") print("k =", tuner_with_scaling_obj.best_k) print("distance function =", tuner_with_scaling_obj.best_distance_function) print("scaler =", tuner_with_scaling_obj.best_scaler) print("f1_score=", tuner_with_scaling_obj.best_f1_score) pred_2 = tuner_with_scaling_obj.best_model.predict(x_test) correct_2 = 0 for i in range(len(pred_2)): if pred_2[i] == y_test[i]: correct_2 += 1 accuracy_2 = float(correct_2)/len(pred_2) print ("Accuracy is: ", accuracy_2) print ("F1 Score:", f1_score(y_test, pred_2))
def evaluate(self, Xt, Xc, y): """ Evaluates the model's accuracy, precision, recall, F1 score and loss value on a dataset. :param Xt: Matrix containing title input. :param Xc: Matrix containing content input. :param y: Label vector. :return: Accuracy, lists containing each topic's precision, recall, F1 score followed by the macro-average across topics, and the model's loss value. """ probs = self.model.predict([Xt, Xc], verbose=1, batch_size=100) preds = np.argmax(probs, axis=1) true = np.argmax(y, 1) acc = np.sum(np.equal(preds, true)) / np.size(true, 0) p, r, f1 = [], [], [] for i in range(len(self.classes)): p.append(utils.precision(preds, true, i)) r.append(utils.recall(preds, true, i)) f1.append(utils.f1_score(p[i], r[i])) p2 = [x for x in p if x is not None] r2 = [x for x in r if x is not None] f2 = [x for x in f1 if x is not None] p.append(np.mean(p2)) r.append(np.mean(r2)) f1.append(np.mean(f2)) print('\nCalculating loss...') self.compile() loss = self.model.evaluate([Xt, Xc], y, batch_size=100)[0] return acc, p, r, f1, loss
def xlnet_evaluate(self, sess, tag): result = [] trans = self.trans.eval() batch = self.dev_batch.__next__() ntokens, tag_ids, inputs_ids, segment_ids, input_mask = zip(*batch) feed = { self.input_ids: inputs_ids, self.segment_ids: segment_ids, self.targets: tag_ids, self.input_mask: input_mask, self.dropout: 1 } pre_paths, acc, lengths = sess.run( [self.pred_ids, self.accuracy, self.length], feed_dict=feed) tar_paths = tag_ids recall, precision, f1 = f1_score(tar_paths, pre_paths, tag, self.tag_map) best = self.best_dev_f1.eval() if f1 > best: print("\tnew best f1:") print("\trecall {:.2f}\t precision {:.2f}\t f1 {:.2f}".format( recall, precision, f1)) tf.assign(self.best_dev_f1, f1).eval()
def eval_fn(data_loader, model, device): model.eval() start = time.time() losses = utils.AverageMeter() val_points = 0 val_loss = 0 val_f1 = 0 temp_val = 0 with torch.no_grad(): #tk0 = tqdm(data_loader, total=len(data_loader)) for bi, d in enumerate(data_loader): ids = d["ids"] mask = d["mask"] targets = d["targets"] ids = ids.to(device, dtype=torch.long) mask = mask.to(device, dtype=torch.long) targets = targets.to(device, dtype=torch.float) outputs = model( input_ids=ids, attention_mask=mask, ) val_points += len(targets) val_loss += loss_fn(outputs, targets) ## add tensor val_f1 += utils.f1_score(outputs, targets) * len(outputs) end = time.time() val_f1 /= val_points val_loss /= val_points logger.info( f'bi={bi}, Avg_val F1={val_f1.item()}, Avg_val loss={val_loss.item()} ,time={end-start}' ) return val_f1, val_loss
def fit_KFold(in_dim, no_classes, model_fn, X, y, X_val, y_val, K=5): folds = list( StratifiedKFold(n_splits=K, shuffle=True, random_state=1).split(X, y)) for i, (idx_tr, idx_val) in enumerate(folds): print(f'\nFold: {i}') data_tr = (X[idx_tr], y[idx_tr]) data_val = (X[idx_val], y[idx_val]) name = f'models/final_model_fold_{i}.h5' callbacks = get_callbacks(name, data_tr, data_val) model = model_fn(in_dim, no_classes) model, hist = fit_model(model, data_tr[0], data_tr[1], data_val[0], data_val[1], callbacks=callbacks, epochs=30) plot_learning(hist.history, i) auc = evaluate_model(model, X_val, y_val) print(f'AUC score for fold {i}: {auc}') preds = model.predict(X_val) for i in range(len(preds)): preds[i][0] = round(preds[i][0]) print(recall_score(y_val, preds)) print(precision_score(y_val, preds)) print(f1_score(y_val, preds))
def get_em_f1(preds): em, f1 = [], [] for _id, pred in preds.items(): answer = hotpot[_id]['answer'] f1.append(f1_score(pred, answer)) em.append(exact_match_score(pred, answer)) return np.mean(em), np.mean(f1)
def KNearestNeighbors(Train, TrainLabels, Test, TestLabels): #K_vals = range(1,26) #accuracy = {} #accuracy_list =[] # for i in K_vals: # K_neighbor = utils.KNeighborsClassifier(n_neighbors =i) # K_neighbor.fit(Train, TrainLabels) # predictions = K_neighbor.predict(Test) # accuracy[i] = utils.metrics.accuracy_score(TestLabels, predictions) # accuracy_list.append(utils.metrics.accuracy_score(TestLabels, predictions)) #Accuracy with auto values #Thanks to sklearn for KNeighborsClassifier interface K_neighbor_new = utils.KNeighborsClassifier(algorithm='auto') K_neighbor_new.fit(Train, TrainLabels) predictions_new = K_neighbor_new.predict(Test) accuracy_train = K_neighbor_new.score(Train, TrainLabels) accuracy = utils.metrics.accuracy_score(TestLabels, predictions_new) fscore = utils.f1_score(TestLabels, predictions_new, average='weighted') print("K-nearest Neighbor: Train Accuracy- ", accuracy_train) print("K-nearest Neighbor: Test Accuracy - ", accuracy, ' F1-Score- ', fscore) print( "====================================================================\n" ) return ('K-Nearest Neigbors', accuracy_train, accuracy, fscore)
def eval_metrics(self, embeddings, data): sampler, class_data = data batches = sampler.batch_generator() # get the results separated by relations all_scores, all_labels, _ = self.model.calc_score_by_relation( batches, embeddings, cuda=self.cuda) # roc --- AUC (curved-based method) _, roc_auc_each = roc_auc_score(all_scores, all_labels) _, pr_auc_each = precision_recall_score(all_scores, all_labels) roc_auc_dict = dict( zip(["roc_auc_{}".format(r) for r in self.model.relation_names], roc_auc_each)) pr_auc_dict = dict( zip(["pr_auc_{}".format(r) for r in self.model.relation_names], pr_auc_each)) # the classification task acc_val = accuracy(embeddings[-1][class_data].cpu(), self.labels[class_data].cpu()) f1_val = f1_score(embeddings[-1][class_data].cpu(), self.labels[class_data].cpu()) # return the metrics ret_dict = { "classification accuracy": acc_val.item(), "classification f1-score": f1_val.item() } ret_dict.update(roc_auc_dict) ret_dict.update(pr_auc_dict) return ret_dict
def analyze(annotations, pair_edits_transforms=tuple()): results = { 'transforms': [t.description for t in pair_edits_transforms], 'pairs': {}, 'f1_a_mean': None, 'f1_median': None, } f1s = [] annotators_ids = sorted(annotations.keys()) for a, b in itertools.combinations(annotators_ids, 2): edits_a, edits_b = annotations[a], annotations[b] # print(a, b, len(edits_a), len(edits_b)) for t in pair_edits_transforms: edits_a, edits_b = t(edits_a, edits_b) # print(a, b, len(edits_a), len(edits_b), 'after:', t.description) edits_a, edits_b = set(edits_a), set(edits_b) # print(a, b, len(edits_a), len(edits_b)) edits_ab = edits_a & edits_b na, nb, nab = len(edits_a), len(edits_b), len(edits_ab) f1_score = utils.f1_score(na, nb, nab) f1s.append(f1_score) results['pairs'][(a, b)] = [round(f1_score, 4), na, nb, nab] results['f1_a_mean'] = round(statistics.mean(f1s), 4) results['f1_median'] = round(statistics.median(f1s), 4) return results
def test_arange(): a = np.arange(4).reshape(2, 2) print(a) print(np.diag(a)) print(np.einsum('i...i', a)) print(euclidean_distance(p1_array, p2_array)) print(f1_score(p1_array, p2_array))
def eval_epoch(sess, model, data_iterator, summary_writer): """Evaluate model for one epoch.""" sess.run(tf.local_variables_initializer()) num_iter = int(np.ceil(data_iterator.size / FLAGS.batch_size)) epoch_loss = 0 for step in xrange(num_iter): source, target, label = data_iterator.next_batch(FLAGS.batch_size) source_len = utils.sequence_length(source) target_len = utils.sequence_length(target) feed_dict = {model.x_source: source, model.x_target: target, model.labels: label, model.source_seq_length: source_len, model.target_seq_length: target_len, model.decision_threshold: FLAGS.decision_threshold} loss_value, epoch_accuracy,\ epoch_precision, epoch_recall = sess.run([model.mean_loss, model.accuracy[1], model.precision[1], model.recall[1]], feed_dict=feed_dict) epoch_loss += loss_value if step % FLAGS.steps_per_checkpoint == 0: summary = sess.run(model.summaries, feed_dict=feed_dict) summary_writer.add_summary(summary, global_step=data_iterator.global_step) epoch_loss /= step epoch_f1 = utils.f1_score(epoch_precision, epoch_recall) print(" Testing: Loss = {:.6f}, Accuracy = {:.4f}, " "Precision = {:.4f}, Recall = {:.4f}, F1 = {:.4f}" .format(epoch_loss, epoch_accuracy, epoch_precision, epoch_recall, epoch_f1))
def evaluate(sess, source_sentences, target_sentences, references, source_sentences_ids, target_sentences_ids, probs_op, placeholders): """"Evalute BiRNN at decision threshold value maximizing the area under the precison-recall curve. """ pairs = [(i, j) for i, j in product(range(len(source_sentences)), range(len(target_sentences)))] data = [(source_sentences_ids[i], target_sentences_ids[j], 1.0) if (i, j) in references else (source_sentences_ids[i], target_sentences_ids[j], 0.0) for i, j in product(range(len(source_sentences)), range(len(target_sentences)))] data_iterator = utils.TestingIterator(np.array(data, dtype=object)) y_score = inference(sess, data_iterator, probs_op, placeholders) y_true = data_iterator.data[:, 2].astype(int) p, r, t = precision_recall_curve(y_true, y_score, pos_label=1) f1 = utils.f1_score(p, r) i = np.argmax(f1) print("Evaluation metrics at decision threshold = {:.4f}\n" "Precision = {:.2f}, Recall = {:.2f}, F1 = {:.2f}\n" "-------------------------------------------------".format( p[i], 100 * r[i], 100 * f1[i], 100 * t[i]))
def test(self): with torch.no_grad(): id2vocab = {self.vocab[i]: i for i in self.vocab} print(len(id2vocab)) f = open('./result/test_tag.json', 'w') total_matrix = np.zeros( [len(self.tags), 3] ) #横坐标分别表示component,disease&symptom,people;纵坐标分别表示recall, precision, f1 count = 0 for batch in self.dev_manager.get_batch(): count += 1 print(count) # print(type(items)) sentences, labels, length = zip(*batch) # sentences, labels, length = zip(*self.dev_batch.__next__()) # print('I am in') strs = [[id2vocab[w] for w in s] for s in sentences] # print(strs) # print(len(sentences),len(sentences[0]),len(sentences[5])) _, paths = self.model(sentences) # print("\teval") # print('path',len(paths),len(paths[0]),len(paths[1])) for i in range(len(self.tags)): recall, precision, f1 = f1_score(labels, paths, self.tags[i], self.model.tag_map) total_matrix[i][0] += recall total_matrix[i][1] += precision total_matrix[i][2] += f1 entities = [] for i in range(len(paths)): tmp = [] for tag in self.tags: tags = get_tags(paths[i], tag, self.tag_map) tmp += format_result(tags, strs[i], tag) entities.append(tmp) # print(entities) for i in range(len(entities)): dic = { 'sentense': ''.join(strs[i]), 'entities': entities[i] } json.dump(dic, f, ensure_ascii=False) # f.write(''.join(strs[i])+'#####找到的实体为#####'+'&'.join(entities[i])+'\n') total_matrix /= count # print(total_matrix) for i in range(len(self.tags)): print( "{}\tcount\t{}\trecall {:.2f}\tprecision {:.2f}\tf1 {:.2f}" .format(count, self.tags[i], total_matrix[i][0], total_matrix[i][1], total_matrix[i][2])) f.close()
def reward(document, sampled_starts, sampled_ends, greedy_start, greedy_end, gold_start, gold_end): rewards = [] gold_answer, baseline_answer = get_answer(document, gold_start, gold_end, int(greedy_start), int(greedy_end)) baseline = f1_score(baseline_answer, gold_answer) em = exact_match_score(baseline_answer, gold_answer) for i in range(4): gold_answer, sample_answer = get_answer(document, gold_start, gold_end, int(sampled_starts[i]), int(sampled_ends[i])) f1 = f1_score(sample_answer, gold_answer) normalized_reward = f1 - baseline rewards.append(normalized_reward) return rewards, baseline, em
def test_f1_score(): from utils import f1_score result = [] y_true = np.random.randint(low=0, high=4, size=(100)) y_true[y_true <= 2] = 0 y_true[y_true > 2] = 1 y_pred = np.linspace(0, 1, num=100) y_pred[y_pred <= 0.5] = 0 y_pred[y_pred > 0.5] = 1 score = f1_score(y_true.flatten().tolist(), y_pred.flatten().tolist()) result.append(score) result.append( f1_score( np.random.randint(0, high=2, size=(100, )).flatten().tolist(), np.random.randint(0, high=2, size=(100, )).flatten().tolist())) return ['[TEST f1_score],' + weights_to_string(result)]
def validate(net, criterion, loader, logger, device=None): for batch_x, batch_y in loader: with torch.no_grad(): output = net(batch_x.to(device)) loss = criterion(output, batch_y.to(device)) logger.log('val_loss', loss.item()) f1_score = utils.f1_score(_flatten(batch_y).cpu(), _flatten(output).cpu()) logger.log('val_f1', f1_score)
def train_model(train_dataset, test_dataset, model, tag2id): # 定义优化器 optimizer = optim.Adam(model.parameters()) for epoch in range(10): total_loss = 0. # 一轮训练的loss batch_count = 0. # batch个数 start = time.time() # 计时 for batch in train_dataset.get_batch(): model.zero_grad() # 读取一个batch的数据并转换为tensor batch_sentences, batch_tags, batch_len = zip(*batch) batch_sentences_tensor = torch.tensor(batch_sentences, dtype=torch.long) batch_tags_tensor = torch.tensor(batch_tags, dtype=torch.long) batch_len_tensor = torch.tensor(batch_len, dtype=torch.long) loss = model.neg_log_likelihood(batch_sentences_tensor, batch_tags_tensor, batch_len_tensor) total_loss += loss.tolist()[0] batch_count += 1 # 反向传播+优化 loss.backward() optimizer.step() # 保存模型参数 torch.save(model.state_dict(), 'models/params.pkl') # 训练集loss(每个batch) print("epoch: {}\tloss: {:.2f}\ttime: {:.1f} sec".format( epoch + 1, total_loss / batch_count, time.time() - start)) # 测试集性能 print("\t** eval **") f1_score(test_dataset, model, tag2id)
def NaiveBayes(Train, TrainLabels, Test, TestLabels): #Thanks to sklearn for GaussianNB interface naive_bayes = utils.GaussianNB() predictions = naive_bayes.fit(Train, TrainLabels).predict(Test) accuracy_train = naive_bayes.score(Train, TrainLabels) accuracy = utils.metrics.accuracy_score(TestLabels, predictions) fscore = utils.f1_score(TestLabels, predictions, average='weighted') print("Naive Bayes Classifier: Train Accuracy- ", accuracy_train) print("Naive Bayes Classifier: Test Accuracy - ", accuracy, ' F1-Score- ', fscore) print( "====================================================================\n" ) return ('Naive Bayes (Baseline)', accuracy_train, accuracy, fscore)
def DiscriminantAnalysisLinear(Train, TrainLabels, Test, TestLabels): linear_fit = utils.LDA() linear_fit.fit(Train, TrainLabels) predictions = linear_fit.predict(Test) accuracy_train = linear_fit.score(Train, TrainLabels) accuracy = utils.metrics.accuracy_score(TestLabels, predictions) fscore = utils.f1_score(TestLabels, predictions, average='weighted') print("Linear Discriminant Analysis: Train Accuracy- ", accuracy_train) print("Linear Discriminant Analysis: Test Accuracy - ", accuracy, ' F1-Score- ', fscore) print( "====================================================================\n" ) return ('Linear Discriminant Analysis', accuracy_train, accuracy, fscore)
def test_knn(self): features, labels = generate_data_cancer() train_features, train_labels = features[:400], labels[:400] valid_features, valid_labels = features[400:460], labels[400:460] test_features, test_labels = features[460:], labels[460:] assert len(train_features) == len(train_labels) == 400 assert len(valid_features) == len(valid_labels) == 60 assert len(test_features) == len(test_labels) == 109 distance_funcs = { # 'euclidean': euclidean_distance, # 'gaussian': gaussian_kernel_distance, 'inner_prod': inner_product_distance, } for name, func in distance_funcs.items(): best_f1_score, best_k = -1, 0 for k in [1]: model = KNN(k=k, distance_function=func) model.train(train_features, train_labels) # print(train_labels) # print(model.predict(train_features)) train_f1_score = f1_score(train_labels, model.predict(train_features)) valid_f1_score = f1_score(valid_labels, model.predict(valid_features)) print(f'[part 2.1] {name}\tk: {k:d}\t' f'train: {train_f1_score:.5f}\t' f'valid: {valid_f1_score:.5f}') if valid_f1_score > best_f1_score: best_f1_score, best_k = valid_f1_score, k model = KNN(k=best_k, distance_function=func) model.train(train_features + valid_features, train_labels + valid_labels) test_f1_score = f1_score(test_labels, model.predict(test_features)) print() print(f'[part 2.1] {name}\tbest_k: {best_k:d}\t' f'test f1 score: {test_f1_score:.5f}') print()
def DiscriminantAnalysisQuadratic(Train, TrainLabels, Test, TestLabels): quadratic_fit = utils.QDA() quadratic_fit.fit(Train, TrainLabels) predictions = quadratic_fit.predict(Test) accuracy_train = quadratic_fit.score(Train, TrainLabels) accuracy = utils.metrics.accuracy_score(TestLabels, predictions) fscore = utils.f1_score(TestLabels, predictions, average='weighted') print("Quadratic Discriminant Analysis: Train Accuracy- ", accuracy_train) print("Quadratic Discriminant Analysis: Test Accuracy - ", accuracy, ' F1-Score- ', fscore) print( "====================================================================\n" ) return ('Quadratic Discriminant Analysis', accuracy_train, accuracy, fscore)
def SupportVectorMachine(Train, TrainLabels, Test, TestLabels, d, c): #Thanks to sklearn for SVC interface svmPred = utils.SVC(gamma='auto', kernel='rbf') svmPred.fit(Train, TrainLabels) accuracy_train = svmPred.score(Train, TrainLabels) predictions = svmPred.predict(Test) accuracy = utils.metrics.accuracy_score(TestLabels, predictions) fscore = utils.f1_score(TestLabels, predictions, average='weighted') print("SVM with RBF: Train Accuracy- ", accuracy_train) print("SVM with RBF: Test Accuracy - ", accuracy, ' F1-Score- ', fscore) print( "====================================================================\n" ) return ('Support Vector Machine (RBF Kernel)', accuracy_train, accuracy, fscore)
def evaluate(self, epoch, manager, add_scalar=True): print('正在开始评估') all_origins = all_founds = all_rights = 0 for tag in self.tags: origins = founds = rights = 0 for batch in manager.get_batch(): sentences, labels, length = zip(*batch) _, paths = self.model(sentences) origin, found, right = f1_score(labels, paths, tag, self.model.tag_map) origins += origin founds += found rights += right all_origins += origins all_founds += founds all_rights += rights recall = 0. if origins == 0 else (rights / origins) precision = 0. if founds == 0 else (rights / founds) f1 = 0. if recall + precision == 0 else ( 2 * precision * recall) / (precision + recall) print("\t{}\torigins:{}\t\t\tfounds:{}\t\t\trights:{}".format( tag, origins, founds, rights)) print("\t\t\trecall:{}\tprecision:{}\tf1:{}".format( recall, precision, f1)) if add_scalar: tag_epoch = tag + '-5epoch' writer.add_scalars(tag_epoch, { 'recall': recall, 'precision': precision, 'f1': f1 }, epoch) all_recall = 0. if all_origins == 0 else (all_rights / all_origins) all_precision = 0. if all_founds == 0 else (all_rights / all_founds) all_f1 = 0. if all_recall + all_precision == 0 else ( 2 * all_precision * all_recall) / (all_precision + all_recall) print("\tall_origins:{}\t\t\tall_founds:{}\t\t\tall_rights:{}".format( all_origins, all_founds, all_rights)) print("\tall_recall:{}\tall_precision:{}\tall_f1:{}".format( all_recall, all_precision, all_f1)) if add_scalar: writer.add_scalars( "ALL-5epoch", { 'all_recall': all_recall, 'all_precision': all_precision, 'all_f1': all_f1 }, epoch) print('评估结束') return all_recall, all_precision, all_f1
def eval_loop(data_loader, model, device): model.eval() exact_scores, f1_scores = [], [] for _, data in enumerate(data_loader): input_ids = data['input_ids'] segment_ids = data['segment_ids'] mask = data['mask'] start_targets = data['start_targets'][0] end_targets = data['end_targets'][0] # torch to GPU input_ids = input_ids.to(device) segment_ids = segment_ids.to(device) mask = mask.to(device) # inference start_logit, end_logit = model(input_ids=input_ids, mask=mask, segment_ids=segment_ids) _, start_pred = torch.max(start_logit[0], 0) _, end_pred = torch.max(end_logit[0], 0) # cover if prediction has end token before start token if end_pred < start_pred: start_pred, end_pred = end_pred, start_pred # calculate best exact and f1 score for prediction among viable answers best_exact = 0 best_f1 = 0. for i in range(len(start_targets)): answer_span = range(int(start_targets[i]), int(end_targets[i]) + 1) pred_span = range(int(start_pred), int(end_pred) + 1) if pred_span is answer_span: best_exact = 1 f1 = f1_score(pred_span, answer_span) if f1 > best_f1: best_f1 = f1 exact_scores.append(best_exact) f1_scores.append(best_f1) final_exact = sum(exact_scores) / len(exact_scores) final_f1 = sum(f1_scores) / len(f1_scores) return final_exact, final_f1
def Logistic_Regression(Train, TrainLabels, Test, TestLabels): #Thanks to sklearn for Logistic Regression interface logistic_reg = utils.LogisticRegression(random_state=0, max_iter=100, multi_class='ovr', solver='lbfgs').fit( Train, TrainLabels) predictions = logistic_reg.predict(Test) accuracy_train = logistic_reg.score(Train, TrainLabels) accuracy = utils.metrics.accuracy_score(TestLabels, predictions) fscore = utils.f1_score(TestLabels, predictions, average='weighted') print("Logistic Regression: Train Accuracy - ", accuracy_train) print("Logistic Regression: Test Accuracy - ", accuracy, ' F1-Score- ', fscore) print( "====================================================================\n" ) return ('Logistic Regression', accuracy_train, accuracy, fscore)
def RandomForestClassifierWithXGBoost(Train, TrainLabels, Test, TestLabels): #Thanks to sklearn for RandomForest interface RFModel = utils.RandomForestClassifier(n_estimators=80, max_depth=100, random_state=0) RFModel.fit(Train, TrainLabels) predictions = RFModel.predict(Test) predictions = RFModel.predict(Test) accuracy_train = RFModel.score(Train, TrainLabels) accuracy = utils.metrics.accuracy_score(TestLabels, predictions) fscore = utils.f1_score(TestLabels, predictions, average='weighted') print("Random Forest: Train Accuracy- ", accuracy_train) print("Random Forest: Test Accuracy - ", accuracy, ' F1-Score- ', fscore) print( "====================================================================\n" ) return ('Random Forest', accuracy_train, accuracy, fscore)
def valid(dataloader, model, device, log): with torch.no_grad(): model.eval() f1_all, acc_all, recall_all = [], [], [] for i, batchs in enumerate(dataloader): sig, other, label = batchs['sig'].to(device), batchs['other'].to( device), batchs['label'].to(device) out = net(sig, other) f1, accuracy, recall = f1_score(out.cpu().data.numpy(), label.cpu().data.numpy()) f1_all.append(f1) acc_all.append(accuracy) recall_all.append(recall) log.logging_flush(f' valid b:{i}/{len(dataloader)} ') f1_all, acc_all, recall_all = np.mean(f1_all), np.mean( acc_all), np.mean(recall_all) log.logging(f'f1:{f1_all} acc:{acc_all} recall:{recall_all}') return f1_all
# Machine Learning models model = Model() kf = LabelKFold(categories, n_folds = 5) # ***shuffle internally*** #kf = KFold(len(X), n_folds = 5, shuffle = True, random_state = 123) #cross validation S = [] for train_index, test_index in kf: trainX, testX = X[train_index], X[test_index] trainY, testY = Y[train_index], Y[test_index] K = None #fit data model.fit(trainX[:K], trainY[:K], sample_weight = sample_weights[train_index][:K]) #make prediction predY = model.predict(testX) #evaluation scores = weighted_precision_recall(testY, predY, sample_weight = sample_weights[test_index]) S.append(scores) print "(Precision/Recall) T: (%.3f,%.3f) / F: (%.3f,%.3f) / N:(%.3f,%.3f)" %(scores[0], scores[1], scores[2], scores[3], scores[4], scores[5]) S = np.array(S) Tp = np.mean(S[:,0]) Tr = np.mean(S[:,1]) Fp = np.mean(S[:,2]) Fr = np.mean(S[:,3]) Np = np.mean(S[:,4]) Nr = np.mean(S[:,5]) print "TOTAL (Precision/Recall) T: (%.3f,%.3f) / F: (%.3f,%.3f) / N:(%.3f,%.3f)" %(Tp, Tr, Fp, Fr, Np, Nr) print "F1 SCORE T:%.3f / F:%.3f / N:%.3f " %(f1_score(Tp, Tr), f1_score(Fp, Fr), f1_score(Np, Nr))