def test_ax(self): np.random.seed(0) clf = LogisticRegression() clf.fit(self.X, self.y) probas = clf.predict_proba(self.X) fig, ax = plt.subplots(1, 1) out_ax = plot_lift_curve(self.y, probas) assert ax is not out_ax out_ax = plot_lift_curve(self.y, probas, ax=ax) assert ax is out_ax
def log_lift_curve(y_true, y_pred, experiment=None, channel_name='metric_charts', prefix=''): """Creates cumulative gain chart and logs it to Neptune. Args: y_true (array-like, shape (n_samples)): Ground truth (correct) target values. y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1. experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. channel_name(str): name of the neptune channel. Default is 'metric_charts'. prefix(str): Prefix that will be added before metric name when logged to Neptune. Examples: Train the model and make predictions on test:: from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report X, y = make_classification(n_samples=2000) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) model = RandomForestClassifier() model.fit(X_train, y_train) y_test_pred = model.predict_proba(X_test) Create and log lift curve chart to Neptune:: import neptune from neptunecontrib.monitoring.metrics import log_lift_curve neptune.init() with neptune.create_experiment(): log_lift_curve(y_test, y_test_pred) Check out this experiment https://ui.neptune.ai/o/neptune-ai/org/binary-classification-metrics/e/BIN-101/logs. """ assert len( y_pred.shape ) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it' _exp = experiment if experiment else neptune expect_not_a_run(_exp) fig, ax = plt.subplots() plt_metrics.plot_lift_curve(y_true, y_pred, ax=ax) send_figure(fig, channel_name=prefix + channel_name, experiment=_exp) plt.close()
def lift_gain_curves(self): y_pred_proba_both_classes = np.column_stack( [1 - self.y_pred_proba, self.y_pred_proba]) gain = plot_cumulative_gain(self.y_test, y_pred_proba_both_classes, title='Cumulative Gains Curve') plt.show() lift = plot_lift_curve(self.y_test, y_pred_proba_both_classes, title='Lift curve') plt.show()
def plot_analysis(combine, test_name, y_true, y_pred, y_proba, labels, verbose, library, save=True, show=True, sessionid="testing", prefix=""): met_index = 0 plt.rcParams.update({'font.size': 14}) # TODO: Find a way to do this better pltmetrics.plot_confusion_matrix(y_true, y_pred) if not combine: #plt.gcf().set_size_inches(3.65,3.65) save_show(plt, library + "/" + prefix, sessionid, "confusion_matrix", show, save, False, True, True, False) else: plt.subplot(2, 4, met_index + 1) met_index += 1 plt.rcParams.update({'font.size': 12}) pltmetrics.plot_roc_curve(y_true, y_proba) for text in plt.gca().legend_.get_texts(): text.set_text(text.get_text().replace("ROC curve of class", "class")) text.set_text(text.get_text().replace("area =", "AUC: ")) text.set_text(text.get_text().replace("micro-average ROC curve", "micro-avg")) text.set_text(text.get_text().replace("macro-average ROC curve", "macro-avg")) if not combine: #plt.gcf().set_size_inches(3.65,3.65) save_show(plt, library + "/" + prefix, sessionid, "roc_curves", show, save, False, True, True, False) else: plt.subplot(2, 4, met_index + 1) met_index += 1 if len(labels) < 3: pltmetrics.plot_ks_statistic(y_true, y_proba) if not combine: #plt.gcf().set_size_inches(3.65,3.65) save_show(plt, library + "/" + prefix, sessionid, "ks_statistics", show, save, False, True, True, False) else: plt.subplot(2, 4, met_index + 1) met_index += 1 pltmetrics.plot_precision_recall_curve(y_true, y_proba) for text in plt.gca().legend_.get_texts(): text.set_text(text.get_text().replace( "Precision-recall curve of class", "class")) text.set_text(text.get_text().replace("area =", "AUC: ")) text.set_text(text.get_text().replace( "micro-average Precision-recall curve", "micro-avg")) text.set_text(text.get_text().replace("macro-average Precision-recall", "macro-avg")) if not combine: #plt.gcf().set_size_inches(3.65,3.65) save_show(plt, library + "/" + prefix, sessionid, "precision_recall_curve", show, save, False, True, True, False) else: plt.subplot(2, 4, met_index + 1) met_index += 1 if len(labels) < 3: pltmetrics.plot_cumulative_gain(y_true, y_proba) if not combine: #plt.gcf().set_size_inches(3.65,3.65) save_show(plt, library + "/" + prefix, sessionid, "cumulative_gain", show, save, False, True, True, False) else: plt.subplot(2, 4, met_index + 1) met_index += 1 if len(labels) < 3: pltmetrics.plot_lift_curve(y_true, y_proba) if not combine: #plt.gcf().set_size_inches(3.65,3.65) save_show(plt, library + "/" + prefix, sessionid, "lift_curve", show, save, False, True, True, False) else: plt.subplot(2, 4, met_index + 1) met_index += 1 if combine: plt.suptitle(test_name) plt.tight_layout(rect=[0, 0.03, 1, 0.95]) save_show(plt, library, sessionid, figname, show, save, True, analysis=True)
############################################################################## ### making ROC Curve ############################################################################## def plot_roc_curve(fpr, tpr, label = None): plt.plot(fpr, tpr, linewidth=2, label = label) plt.plot([0,1], [0,1], "k--") plt.axis([0,1,0,1]) plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plot_roc_curve(fpr, tpr) plt.show() # ### Our high recall rate attributes to a fairly nice ROC curve. # In[82]: ############################################################################## ### SVM Lift Curve ############################################################################## y_probas = clf.predict_proba(test_xn) plot_lift_curve(test_y, y_probas) plt.show()
def test_array_like(self): plot_lift_curve([0, 1], [[0.8, 0.2], [0.2, 0.8]]) plot_lift_curve([0, 'a'], [[0.8, 0.2], [0.2, 0.8]]) plot_lift_curve(['b', 'a'], [[0.8, 0.2], [0.2, 0.8]])
def test_string_classes(self): np.random.seed(0) clf = LogisticRegression() clf.fit(self.X, convert_labels_into_string(self.y)) probas = clf.predict_proba(self.X) plot_lift_curve(convert_labels_into_string(self.y), probas)
#Print Area Under Curve plt.figure() false_positive_rate, recall, thresholds = roc_curve(target_bi1_valid, predictions_lgbm_prob) roc_auc = auc(false_positive_rate, recall) plt.title('Receiver Operating Characteristic (ROC)') plt.plot(false_positive_rate, recall, 'b', label = 'AUC = %0.3f' %roc_auc) plt.legend(loc='lower right') plt.plot([0,1], [0,1], 'r--') plt.xlim([0.0,1.0]) plt.ylim([0.0,1.0]) plt.ylabel('Recall') plt.xlabel('Fall-out (1-Specificity)') plt.show() print('AUC score:', roc_auc) #Print Confusion Matrix plt.figure() # cm = confusion_matrix(target_bi1_valid, predictions_lgbm_01) labels = ['0', '1'] plt.figure(figsize=(8,6)) sns.heatmap(cm, xticklabels = labels, yticklabels = labels, annot = True, fmt='d', cmap="Blues", vmin = 0.2); plt.title('Confusion Matrix') plt.ylabel('True Class') plt.xlabel('Predicted Class') plt.savefig('CM_LightGBM.png', dpi=300) plt.show() # Lift plot_lift_curve(target_bi1_valid, pd.get_dummies(y_pred).to_numpy())