def test_iba_error_y_score_prob_error(score_loss): y_true, y_pred, _ = make_prediction(binary=True) aps = make_index_balanced_accuracy( alpha=0.5, squared=True)(score_loss) with pytest.raises(AttributeError): aps(y_true, y_pred)
def classification_report_imbalanced_values( y_true, y_pred, labels, target_names=None, sample_weight=None, digits=2, alpha=0.1 ): """Copy of imblearn.metrics.classification_report_imbalanced to have access to the raw values. The code is mostly the same except the formatting code and generation of the report which haven removed. Copied from version 0.4.3. The original code is living here: https://github.com/scikit-learn-contrib/imbalanced-learn/blob/b861b3a8e3414c52f40a953f2e0feca5b32e7460/imblearn/metrics/_classification.py#L790 """ labels = np.asarray(labels) if target_names is None: target_names = [str(label) for label in labels] # Compute the different metrics # Precision/recall/f1 precision, recall, f1, support = precision_recall_fscore_support( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight ) # Specificity specificity = specificity_score( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight ) # Geometric mean geo_mean = geometric_mean_score( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight ) # Index balanced accuracy iba_gmean = make_index_balanced_accuracy(alpha=alpha, squared=True)( geometric_mean_score ) iba = iba_gmean( y_true, y_pred, labels=labels, average=None, sample_weight=sample_weight ) result = {"targets": {}} for i, label in enumerate(labels): result["targets"][target_names[i]] = { "precision": precision[i], "recall": recall[i], "specificity": specificity[i], "f1": f1[i], "geo_mean": geo_mean[i], "iba": iba[i], "support": support[i], } result["average"] = { "precision": np.average(precision, weights=support), "recall": np.average(recall, weights=support), "specificity": np.average(specificity, weights=support), "f1": np.average(f1, weights=support), "geo_mean": np.average(geo_mean, weights=support), "iba": np.average(iba, weights=support), "support": np.sum(support), } return result
def test_iba_error_y_score_prob(): y_true, y_pred, _ = make_prediction(binary=True) aps = make_index_balanced_accuracy(alpha=0.5, squared=True)(average_precision_score) assert_raises(AttributeError, aps, y_true, y_pred) brier = make_index_balanced_accuracy(alpha=0.5, squared=True)(brier_score_loss) assert_raises(AttributeError, brier, y_true, y_pred) kappa = make_index_balanced_accuracy(alpha=0.5, squared=True)(cohen_kappa_score) assert_raises(AttributeError, kappa, y_true, y_pred) ras = make_index_balanced_accuracy(alpha=0.5, squared=True)(roc_auc_score) assert_raises(AttributeError, ras, y_true, y_pred)
def test_iba_geo_mean_binary(): y_true, y_pred, _ = make_prediction(binary=True) iba_gmean = make_index_balanced_accuracy( alpha=0.5, squared=True)(geometric_mean_score) iba = iba_gmean(y_true, y_pred) assert_allclose(iba, 0.5948, rtol=R_TOL)
def test_iba_geo_mean_binary(): y_true, y_pred, _ = make_prediction(binary=True) iba_gmean = make_index_balanced_accuracy( alpha=0.5, squared=True)(geometric_mean_score) iba = iba_gmean(y_true, y_pred) assert_allclose(iba, 0.5948, rtol=R_TOL)
def test_iba_geo_mean_binary(): """Test to test the iba using the geometric mean""" y_true, y_pred, _ = make_prediction(binary=True) iba_gmean = make_index_balanced_accuracy( alpha=0.5, squared=True)(geometric_mean_score) iba = iba_gmean(y_true, y_pred) assert_almost_equal(iba, 0.54, 2)
def flat_iba(preds, labels): pred_flat = np.argmax(preds, axis=1).flatten() labels_flat = labels.flatten() geo_mean = geometric_mean_score(labels_flat, pred_flat, average=None, sample_weight=None) iba_gmean = make_index_balanced_accuracy(alpha=0.1, squared=True)(geometric_mean_score) iba = iba_gmean(labels_flat, pred_flat, average=None, sample_weight=None) _, _, _, support = precision_recall_fscore_support(labels_flat, pred_flat, average=None, sample_weight=None) res = np.average(iba, weights=support) return res
def test_iba_error_y_score_prob(): """Test if an error is raised when a scoring metric take over parameters than y_pred""" y_true, y_pred, _ = make_prediction(binary=True) aps = make_index_balanced_accuracy(alpha=0.5, squared=True)(average_precision_score) assert_raises(AttributeError, aps, y_true, y_pred) brier = make_index_balanced_accuracy(alpha=0.5, squared=True)(brier_score_loss) assert_raises(AttributeError, brier, y_true, y_pred) kappa = make_index_balanced_accuracy(alpha=0.5, squared=True)(cohen_kappa_score) assert_raises(AttributeError, kappa, y_true, y_pred) ras = make_index_balanced_accuracy(alpha=0.5, squared=True)(roc_auc_score) assert_raises(AttributeError, ras, y_true, y_pred)
def test_iba_sklearn_metrics(): y_true, y_pred, _ = make_prediction(binary=True) acc = make_index_balanced_accuracy(alpha=0.5, squared=True)(accuracy_score) score = acc(y_true, y_pred) assert_equal(score, 0.54756) jss = make_index_balanced_accuracy(alpha=0.5, squared=True)(jaccard_similarity_score) score = jss(y_true, y_pred) assert_equal(score, 0.54756) pre = make_index_balanced_accuracy(alpha=0.5, squared=True)(precision_score) score = pre(y_true, y_pred) assert_equal(score, 0.65025) rec = make_index_balanced_accuracy(alpha=0.5, squared=True)(recall_score) score = rec(y_true, y_pred) assert_equal(score, 0.41616000000000009)
def test_iba_sklearn_metrics(): y_true, y_pred, _ = make_prediction(binary=True) acc = make_index_balanced_accuracy(alpha=0.5, squared=True)(accuracy_score) score = acc(y_true, y_pred) assert score == approx(0.54756) jss = make_index_balanced_accuracy( alpha=0.5, squared=True)(jaccard_similarity_score) score = jss(y_true, y_pred) assert score == approx(0.54756) pre = make_index_balanced_accuracy( alpha=0.5, squared=True)(precision_score) score = pre(y_true, y_pred) assert score == approx(0.65025) rec = make_index_balanced_accuracy(alpha=0.5, squared=True)(recall_score) score = rec(y_true, y_pred) assert score == approx(0.41616000000000009)
def test_iba_error_y_score_prob(): y_true, y_pred, _ = make_prediction(binary=True) aps = make_index_balanced_accuracy( alpha=0.5, squared=True)(average_precision_score) with raises(AttributeError): aps(y_true, y_pred) brier = make_index_balanced_accuracy( alpha=0.5, squared=True)(brier_score_loss) with raises(AttributeError): brier(y_true, y_pred) kappa = make_index_balanced_accuracy( alpha=0.5, squared=True)(cohen_kappa_score) with raises(AttributeError): kappa(y_true, y_pred) ras = make_index_balanced_accuracy(alpha=0.5, squared=True)(roc_auc_score) with raises(AttributeError): ras(y_true, y_pred)
def test_iba_error_y_score_prob(): y_true, y_pred, _ = make_prediction(binary=True) aps = make_index_balanced_accuracy(alpha=0.5, squared=True)(average_precision_score) with raises(AttributeError): aps(y_true, y_pred) brier = make_index_balanced_accuracy(alpha=0.5, squared=True)(brier_score_loss) with raises(AttributeError): brier(y_true, y_pred) kappa = make_index_balanced_accuracy(alpha=0.5, squared=True)(cohen_kappa_score) with raises(AttributeError): kappa(y_true, y_pred) ras = make_index_balanced_accuracy(alpha=0.5, squared=True)(roc_auc_score) with raises(AttributeError): ras(y_true, y_pred)
def plot_learning_curve(self, estimator, title, X, y, train_sizes=np.linspace(0.1, 1.0, 5)): """ Generate test and training learning curve. """ _, ax = plt.subplots(1, 1, figsize=(8, 6)) ax.set_title(title) ax.set_xlabel("Training examples") ax.set_ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, train_sizes=train_sizes, scoring=make_scorer( make_index_balanced_accuracy()(geometric_mean_score)), verbose=1, ) pd.DataFrame({ "train_size": np.array([[size] * train_scores.shape[1] for size in train_sizes]).reshape(-1), "train_score": train_scores.reshape(-1), "test_score": test_scores.reshape(-1), }).to_csv( self._file_path / Path(f"../data/results/{title.replace(' ', '_')}_values.csv")) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) # Plot learning curve ax.grid() ax.fill_between( train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r", ) ax.fill_between( train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g", ) ax.plot(train_sizes, train_scores_mean, "o-", color="r", label="Training score") ax.plot( train_sizes, test_scores_mean, "o-", color="g", label="Cross-validation score", ) ax.legend(loc="best") plt.tight_layout() plt.savefig(self._file_path / Path(f"../data/results/{title.replace(' ', '_')}.pdf"))
def train_relevance_scoring(self): X, y = self.loader.labeled_texts() X_train, X_test, y_train, y_test = self.train_test_split(X, y) # BOW models grid_search_parameters = { "tfidf__ngram_range": [(1, 1), (1, 3)], "tfidf__use_idf": (True, False), } for model_name, model in [ ("complement", ComplementNB), ("multinomial", MultinomialNB), ]: pipeline = Pipeline([ ("norm", TextNormalizer()), ( "tfidf", TfidfVectorizer(tokenizer=self._identity, preprocessor=None, lowercase=False), ), ("clf", model(alpha=0.001)), ]) gs_model = GridSearchCV( pipeline, grid_search_parameters, scoring=make_scorer( make_index_balanced_accuracy()(geometric_mean_score)), verbose=2, ) start_time = time.time() gs_model = gs_model.fit(X_train, y_train) training_time = f"{int(time.time()-start_time)/60:.1f}" best_params = gs_model.best_params_ predicted = gs_model.predict(X_test) report = classification_report_imbalanced(y_test, predicted) with open( self._file_path / Path(f"../data/results/relevance_{model_name}_report.txt"), "w", ) as f: f.write( str(best_params) + "\n\n" + report + "\n\n" + f"training time: {training_time} min.") self.plot_confusion_matrix( confusion_matrix(y_test, predicted), ["irrelevant", "relevant"], f"Confusion matrix, relevance scoring, {model_name} NBC", ) self.plot_learning_curve( pipeline.set_params(**best_params), f"Learning curve, relevance scoring, {model_name} NBC", X, y, ) with open( self._file_path / Path(f"../data/results/relevance_{model_name}_model.pickle" ), "wb", ) as f: pickle.dump(gs_model.best_estimator_, f) # Embedding models without ADASYN embedder = MeanDocumentEmbedder() X_embedded = np.array(list(embedder.transform(X))) (X_train_embedded, X_test_embedded, y_train, y_test) = self.train_test_split(X_embedded, y) for model_name, model in [ ("logistic regression", LogisticRegression()), ("k-nearest neighbors", KNeighborsClassifier()), ("support vector classifier", SVC()), ("multi layer perceptron", MLPClassifier()), ]: start_time = time.time() model.fit(X_train_embedded, y_train) training_time = f"{int(time.time()-start_time)/60:.1f}" predicted = model.predict(X_test_embedded) report = classification_report_imbalanced(y_test, predicted) with open( self._file_path / Path( f"../data/results/relevance_no_adasyn_{model_name}_report.txt" ), "w", ) as f: f.write(report + "\n\n" + f"training time: {training_time} min.") self.plot_confusion_matrix( confusion_matrix(y_test, predicted), ["irrelevant", "relevant"], f"Confusion matrix (no ADASYN), relevance scoring, {model_name}", ) # Embedding models with ADASYN adasyn = ADASYN(random_state=13353) X_resample, y_resample = adasyn.fit_sample(X_train_embedded, y_train) for model_name, model in [ ("logistic regression", LogisticRegression), ("k-nearest neighbors", KNeighborsClassifier), ("support vector classifier", SVC), ("multi layer perceptron", MLPClassifier), ]: if model_name == "support vector classifier": clf = model(probability=True) else: clf = model() start_time = time.time() clf = clf.fit(X_resample, y_resample) training_time = f"{int(time.time()-start_time)/60:.1f}" predicted = clf.predict(X_test_embedded) report = classification_report_imbalanced(y_test, predicted) with open( self._file_path / Path(f"../data/results/relevance_{model_name}_report.txt"), "w", ) as f: f.write(report + "\n\n" + f"training time: {training_time} min.") with open( self._file_path / Path(f"../data/results/relevance_{model_name}_model.pickle" ), "wb", ) as f: pickle.dump(clf, f) self.plot_confusion_matrix( confusion_matrix(y_test, predicted), ["irrelevant", "relevant"], f"Confusion matrix, relevance scoring, {model_name}", ) self.plot_learning_curve( model(), f"Learning curve, relevance scoring, {model_name}", X_resample, y_resample, )
def _train_key_entity_classification(self, X, y, entity): X_train, X_test, y_train, y_test = self.train_test_split(X, y, stratify=y) grid_search_parameters = { "tfidf__ngram_range": [(1, 1), (1, 3), (1, 4)], "tfidf__use_idf": (True, False), "clf__alpha": (0.01, 0.001), } for model_name, model in [ ("Bernoulli", BernoulliNB), ("multinomial", MultinomialNB), ]: pipeline = Pipeline([ ("norm", TextNormalizer()), ( "tfidf", TfidfVectorizer(tokenizer=self._identity, preprocessor=None, lowercase=False), ), ("clf", model()), ]) gs_model = GridSearchCV( pipeline, grid_search_parameters, scoring=make_scorer( make_index_balanced_accuracy()(geometric_mean_score)), verbose=2, ) start_time = time.time() gs_model = gs_model.fit(X_train, y_train) training_time = f"{int(time.time()-start_time)/60:.1f}" best_params = gs_model.best_params_ with open( self._file_path / Path( f"../data/results/{entity}_{model_name}_model.pickle"), "wb", ) as f: pickle.dump(gs_model.best_estimator_, f) predicted = gs_model.predict(X_test) report = classification_report_imbalanced(y_test, predicted) with open( self._file_path / Path(f"../data/results/{entity}_{model_name}_report.txt"), "w", ) as f: f.write( str(best_params) + "\n\n" + report + "\n\n" + f"training time: {training_time} min.") self.plot_confusion_matrix( confusion_matrix(y_test, predicted), ["not key", "is key"], f"Confusion matrix, {entity} key entity, {model_name} NBC", ) self.plot_learning_curve( pipeline.set_params(**best_params), f"Learning curve, {entity} key entity, {model_name} NBC", X, y, )
def test_iba_error_y_score_prob_error(score_loss): y_true, y_pred, _ = make_prediction(binary=True) aps = make_index_balanced_accuracy(alpha=0.5, squared=True)(score_loss) with pytest.raises(AttributeError): aps(y_true, y_pred)
class Trainer: loader = DataLoader() iba = make_index_balanced_accuracy()(geometric_mean_score) train_test_split = partial(train_test_split, test_size=0.25, random_state=13353) _file_path = Path(__file__).parent.resolve() def train_relevance_scoring(self): X, y = self.loader.labeled_texts() X_train, X_test, y_train, y_test = self.train_test_split(X, y) # BOW models grid_search_parameters = { "tfidf__ngram_range": [(1, 1), (1, 3)], "tfidf__use_idf": (True, False), } for model_name, model in [ ("complement", ComplementNB), ("multinomial", MultinomialNB), ]: pipeline = Pipeline([ ("norm", TextNormalizer()), ( "tfidf", TfidfVectorizer(tokenizer=self._identity, preprocessor=None, lowercase=False), ), ("clf", model(alpha=0.001)), ]) gs_model = GridSearchCV( pipeline, grid_search_parameters, scoring=make_scorer( make_index_balanced_accuracy()(geometric_mean_score)), verbose=2, ) start_time = time.time() gs_model = gs_model.fit(X_train, y_train) training_time = f"{int(time.time()-start_time)/60:.1f}" best_params = gs_model.best_params_ predicted = gs_model.predict(X_test) report = classification_report_imbalanced(y_test, predicted) with open( self._file_path / Path(f"../data/results/relevance_{model_name}_report.txt"), "w", ) as f: f.write( str(best_params) + "\n\n" + report + "\n\n" + f"training time: {training_time} min.") self.plot_confusion_matrix( confusion_matrix(y_test, predicted), ["irrelevant", "relevant"], f"Confusion matrix, relevance scoring, {model_name} NBC", ) self.plot_learning_curve( pipeline.set_params(**best_params), f"Learning curve, relevance scoring, {model_name} NBC", X, y, ) with open( self._file_path / Path(f"../data/results/relevance_{model_name}_model.pickle" ), "wb", ) as f: pickle.dump(gs_model.best_estimator_, f) # Embedding models without ADASYN embedder = MeanDocumentEmbedder() X_embedded = np.array(list(embedder.transform(X))) (X_train_embedded, X_test_embedded, y_train, y_test) = self.train_test_split(X_embedded, y) for model_name, model in [ ("logistic regression", LogisticRegression()), ("k-nearest neighbors", KNeighborsClassifier()), ("support vector classifier", SVC()), ("multi layer perceptron", MLPClassifier()), ]: start_time = time.time() model.fit(X_train_embedded, y_train) training_time = f"{int(time.time()-start_time)/60:.1f}" predicted = model.predict(X_test_embedded) report = classification_report_imbalanced(y_test, predicted) with open( self._file_path / Path( f"../data/results/relevance_no_adasyn_{model_name}_report.txt" ), "w", ) as f: f.write(report + "\n\n" + f"training time: {training_time} min.") self.plot_confusion_matrix( confusion_matrix(y_test, predicted), ["irrelevant", "relevant"], f"Confusion matrix (no ADASYN), relevance scoring, {model_name}", ) # Embedding models with ADASYN adasyn = ADASYN(random_state=13353) X_resample, y_resample = adasyn.fit_sample(X_train_embedded, y_train) for model_name, model in [ ("logistic regression", LogisticRegression), ("k-nearest neighbors", KNeighborsClassifier), ("support vector classifier", SVC), ("multi layer perceptron", MLPClassifier), ]: if model_name == "support vector classifier": clf = model(probability=True) else: clf = model() start_time = time.time() clf = clf.fit(X_resample, y_resample) training_time = f"{int(time.time()-start_time)/60:.1f}" predicted = clf.predict(X_test_embedded) report = classification_report_imbalanced(y_test, predicted) with open( self._file_path / Path(f"../data/results/relevance_{model_name}_report.txt"), "w", ) as f: f.write(report + "\n\n" + f"training time: {training_time} min.") with open( self._file_path / Path(f"../data/results/relevance_{model_name}_model.pickle" ), "wb", ) as f: pickle.dump(clf, f) self.plot_confusion_matrix( confusion_matrix(y_test, predicted), ["irrelevant", "relevant"], f"Confusion matrix, relevance scoring, {model_name}", ) self.plot_learning_curve( model(), f"Learning curve, relevance scoring, {model_name}", X_resample, y_resample, ) def train_key_entity_classifications(self): X_date, y_date = self.loader.labeled_date_sentences() self._train_key_entity_classification(X_date, y_date, "date") X_count, y_count = self.loader.labeled_count_sentences() self._train_key_entity_classification(X_count, y_count, "count") def _train_key_entity_classification(self, X, y, entity): X_train, X_test, y_train, y_test = self.train_test_split(X, y, stratify=y) grid_search_parameters = { "tfidf__ngram_range": [(1, 1), (1, 3), (1, 4)], "tfidf__use_idf": (True, False), "clf__alpha": (0.01, 0.001), } for model_name, model in [ ("Bernoulli", BernoulliNB), ("multinomial", MultinomialNB), ]: pipeline = Pipeline([ ("norm", TextNormalizer()), ( "tfidf", TfidfVectorizer(tokenizer=self._identity, preprocessor=None, lowercase=False), ), ("clf", model()), ]) gs_model = GridSearchCV( pipeline, grid_search_parameters, scoring=make_scorer( make_index_balanced_accuracy()(geometric_mean_score)), verbose=2, ) start_time = time.time() gs_model = gs_model.fit(X_train, y_train) training_time = f"{int(time.time()-start_time)/60:.1f}" best_params = gs_model.best_params_ with open( self._file_path / Path( f"../data/results/{entity}_{model_name}_model.pickle"), "wb", ) as f: pickle.dump(gs_model.best_estimator_, f) predicted = gs_model.predict(X_test) report = classification_report_imbalanced(y_test, predicted) with open( self._file_path / Path(f"../data/results/{entity}_{model_name}_report.txt"), "w", ) as f: f.write( str(best_params) + "\n\n" + report + "\n\n" + f"training time: {training_time} min.") self.plot_confusion_matrix( confusion_matrix(y_test, predicted), ["not key", "is key"], f"Confusion matrix, {entity} key entity, {model_name} NBC", ) self.plot_learning_curve( pipeline.set_params(**best_params), f"Learning curve, {entity} key entity, {model_name} NBC", X, y, ) def plot_confusion_matrix( self, cm, target_names, title, ): """ Plot a sklearn confusion matrix (cm) Citiation --------- http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html """ misclass = 1 - np.trace(cm) / float(np.sum(cm)) plt.figure(figsize=(8, 6)) plt.imshow(cm, interpolation="nearest", cmap=plt.get_cmap("Blues")) plt.title(title) plt.colorbar() if target_names is not None: tick_marks = np.arange(len(target_names)) plt.xticks(tick_marks, target_names, rotation=45) plt.yticks(tick_marks, target_names) thresh = cm.max() / 2 for i, j in product(range(cm.shape[0]), range(cm.shape[1])): plt.text( j, i, "{:,}".format(cm[i, j]), horizontalalignment="right", color="white" if cm[i, j] > thresh else "black", ) plt.ylabel("True label") plt.xlabel("Predicted label\nmisclass={:0.2f}".format(misclass)) plt.tight_layout() plt.savefig(self._file_path / Path(f"../data/results/{title.replace(' ', '_')}.pdf")) def plot_learning_curve(self, estimator, title, X, y, train_sizes=np.linspace(0.1, 1.0, 5)): """ Generate test and training learning curve. """ _, ax = plt.subplots(1, 1, figsize=(8, 6)) ax.set_title(title) ax.set_xlabel("Training examples") ax.set_ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, train_sizes=train_sizes, scoring=make_scorer( make_index_balanced_accuracy()(geometric_mean_score)), verbose=1, ) pd.DataFrame({ "train_size": np.array([[size] * train_scores.shape[1] for size in train_sizes]).reshape(-1), "train_score": train_scores.reshape(-1), "test_score": test_scores.reshape(-1), }).to_csv( self._file_path / Path(f"../data/results/{title.replace(' ', '_')}_values.csv")) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) # Plot learning curve ax.grid() ax.fill_between( train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r", ) ax.fill_between( train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g", ) ax.plot(train_sizes, train_scores_mean, "o-", color="r", label="Training score") ax.plot( train_sizes, test_scores_mean, "o-", color="g", label="Cross-validation score", ) ax.legend(loc="best") plt.tight_layout() plt.savefig(self._file_path / Path(f"../data/results/{title.replace(' ', '_')}.pdf")) def _identity(self, text): return text
def classificationReportDict(trueY, predY, labels=None, targetNames=None, sampleWeight=None, alpha=0.1): report = dict() if labels is None: labels = unique_labels(trueY, predY) else: labels = np.asarray(labels) if targetNames is None: targetNames = [str(label) for label in labels] # Precision Recall F1 Support precision, recall, f1, support = \ precision_recall_fscore_support(trueY, predY, labels=labels, average=None, sample_weight=sampleWeight) # Specificity specificity = specificity_score(trueY, predY, labels=labels, average=None, sample_weight=sampleWeight) # Geometric mean gMean = geometric_mean_score(trueY, predY, labels=labels, average=None, sample_weight=sampleWeight) # Index balanced accuracy ibaGMeanScore = make_index_balanced_accuracy( alpha=alpha, squared=True)(geometric_mean_score) ibaGMean = ibaGMeanScore(trueY, predY, labels=labels, average=None, sample_weight=sampleWeight) for i, label in enumerate(labels): targetName = targetNames[i] report[targetName] = { 'Precision': precision[i], 'Recall': recall[i], 'F1': f1[i], 'Specificity': specificity[i], 'GMean': gMean[i], 'IbaGMean': ibaGMean[i], 'Support': support[i], } report['Weighted Avg'] = { 'Precision': np.average(precision, weights=support), 'Recall': np.average(recall, weights=support), 'F1': np.average(f1, weights=support), 'Specificity': np.average(specificity, weights=support), 'GMean': np.average(gMean, weights=support), 'IbaGMean': np.average(ibaGMean, weights=support), 'Support': np.sum(support) } report['Macro Avg'] = { 'Precision': np.average(precision), 'Recall': np.average(recall), 'F1': np.average(f1), 'Specificity': np.average(specificity), 'GMean': np.average(gMean), 'IbaGMean': np.average(ibaGMean), 'Support': np.sum(support) } # Accuracy accuracy = accuracy_score(trueY, predY, normalize=True, sample_weight=sampleWeight) report['Accuracy'] = accuracy return report
def test_iba_sklearn_metrics(score, expected_score): y_true, y_pred, _ = make_prediction(binary=True) score_iba = make_index_balanced_accuracy(alpha=0.5, squared=True)(score) score = score_iba(y_true, y_pred) assert score == pytest.approx(expected_score)
# The geometric mean corresponds to the square root of the product of the # sensitivity and specificity. Combining the two metrics should account for # the balancing of the dataset. # %% from imblearn.metrics import geometric_mean_score print(f"The geometric mean is {geometric_mean_score(y_test, y_pred):.3f}") # %% [markdown] # The index balanced accuracy can transform any metric to be used in # imbalanced learning problems. # %% from imblearn.metrics import make_index_balanced_accuracy alpha = 0.1 geo_mean = make_index_balanced_accuracy(alpha=alpha, squared=True)(geometric_mean_score) print(f"The IBA using alpha={alpha} and the geometric mean: " f"{geo_mean(y_test, y_pred):.3f}") # %% alpha = 0.5 geo_mean = make_index_balanced_accuracy(alpha=alpha, squared=True)(geometric_mean_score) print(f"The IBA using alpha={alpha} and the geometric mean: " f"{geo_mean(y_test, y_pred):.3f}")
R_TOL = 1e-2 @pytest.fixture def data(): X, y = make_blobs(random_state=0, centers=2) return train_test_split(X, y, random_state=0) @pytest.mark.filterwarnings("ignore:Liblinear failed to converge") @pytest.mark.parametrize( "score, expected_score", [(sensitivity_score, 0.92), (specificity_score, 0.92), (geometric_mean_score, 0.92), (make_index_balanced_accuracy()(geometric_mean_score), 0.85)] ) @pytest.mark.parametrize("average",['macro', 'weighted', 'micro']) def test_scorer_common_average(data, score, expected_score, average): X_train, X_test, y_train, _ = data scorer = make_scorer(score, pos_label=None, average=average) grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer, cv=3, iid=False) grid.fit(X_train, y_train).predict(X_test) assert grid.best_score_ == pytest.approx(expected_score, rel=R_TOL) @pytest.mark.filterwarnings("ignore:Liblinear failed to converge") @pytest.mark.parametrize(
@pytest.fixture def data(): X, y = make_blobs(random_state=0, centers=2) return train_test_split(X, y, random_state=0) @pytest.mark.filterwarnings("ignore:Liblinear failed to converge") @pytest.mark.parametrize( "score, expected_score", [ (sensitivity_score, 0.92), (specificity_score, 0.92), (geometric_mean_score, 0.92), (make_index_balanced_accuracy()(geometric_mean_score), 0.85), ], ) @pytest.mark.parametrize("average", ["macro", "weighted", "micro"]) def test_scorer_common_average(data, score, expected_score, average): X_train, X_test, y_train, _ = data scorer = make_scorer(score, pos_label=None, average=average) grid = GridSearchCV( LinearSVC(random_state=0), param_grid={"C": [1, 10]}, scoring=scorer, cv=3, ) grid.fit(X_train, y_train).predict(X_test)
def test_imblearn_classification_scorers(): X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = LinearSVC(random_state=0) clf.fit(X_train, y_train) # sensitivity scorer scorer = make_scorer(sensitivity_score, pos_label=None, average='macro') grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(sensitivity_score, pos_label=None, average='weighted') grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(sensitivity_score, pos_label=None, average='micro') grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(sensitivity_score, pos_label=1) grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) # specificity scorer scorer = make_scorer(specificity_score, pos_label=None, average='macro') grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(specificity_score, pos_label=None, average='weighted') grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(specificity_score, pos_label=None, average='micro') grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(specificity_score, pos_label=1) grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.95, rtol=R_TOL) # geometric_mean scorer scorer = make_scorer(geometric_mean_score, pos_label=None, average='macro') grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer( geometric_mean_score, pos_label=None, average='weighted') grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(geometric_mean_score, pos_label=None, average='micro') grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(geometric_mean_score, pos_label=1) grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) # make a iba metric before a scorer geo_mean_iba = make_index_balanced_accuracy()(geometric_mean_score) scorer = make_scorer(geo_mean_iba, pos_label=None, average='macro') grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.85, rtol=R_TOL) scorer = make_scorer(geo_mean_iba, pos_label=None, average='weighted') grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.85, rtol=R_TOL) scorer = make_scorer(geo_mean_iba, pos_label=None, average='micro') grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.85, rtol=R_TOL) scorer = make_scorer(geo_mean_iba, pos_label=1) grid = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.84, rtol=R_TOL)
############################################################################### # The geometric mean corresponds to the square root of the product of the # sensitivity and specificity. Combining the two metrics should account for # the balancing of the dataset. print('The geometric mean is {}'.format(geometric_mean_score( y_test, y_pred_bal))) ############################################################################### # The index balanced accuracy can transform any metric to be used in # imbalanced learning problems. alpha = 0.1 geo_mean = make_index_balanced_accuracy(alpha=alpha, squared=True)( geometric_mean_score) print('The IBA using alpha = {} and the geometric mean: {}'.format( alpha, geo_mean( y_test, y_pred_bal))) alpha = 0.5 geo_mean = make_index_balanced_accuracy(alpha=alpha, squared=True)( geometric_mean_score) print('The IBA using alpha = {} and the geometric mean: {}'.format( alpha, geo_mean( y_test, y_pred_bal)))
def test_imblearn_classification_scorers(): X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = LinearSVC(random_state=0) clf.fit(X_train, y_train) # sensitivity scorer scorer = make_scorer(sensitivity_score, pos_label=None, average='macro') grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(sensitivity_score, pos_label=None, average='weighted') grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(sensitivity_score, pos_label=None, average='micro') grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(sensitivity_score, pos_label=1) grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) # specificity scorer scorer = make_scorer(specificity_score, pos_label=None, average='macro') grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(specificity_score, pos_label=None, average='weighted') grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(specificity_score, pos_label=None, average='micro') grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(specificity_score, pos_label=1) grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.95, rtol=R_TOL) # geometric_mean scorer scorer = make_scorer(geometric_mean_score, pos_label=None, average='macro') grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(geometric_mean_score, pos_label=None, average='weighted') grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(geometric_mean_score, pos_label=None, average='micro') grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) scorer = make_scorer(geometric_mean_score, pos_label=1) grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.92, rtol=R_TOL) # make a iba metric before a scorer geo_mean_iba = make_index_balanced_accuracy()(geometric_mean_score) scorer = make_scorer(geo_mean_iba, pos_label=None, average='macro') grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.85, rtol=R_TOL) scorer = make_scorer(geo_mean_iba, pos_label=None, average='weighted') grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.85, rtol=R_TOL) scorer = make_scorer(geo_mean_iba, pos_label=None, average='micro') grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.85, rtol=R_TOL) scorer = make_scorer(geo_mean_iba, pos_label=1) grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]}, scoring=scorer) grid.fit(X_train, y_train).predict(X_test) assert_allclose(grid.best_score_, 0.84, rtol=R_TOL)
def test_iba_sklearn_metrics(score, expected_score): y_true, y_pred, _ = make_prediction(binary=True) score_iba = make_index_balanced_accuracy(alpha=0.5, squared=True)(score) score = score_iba(y_true, y_pred) assert score == pytest.approx(expected_score)