def test_array_like(self): ax = skplt.plot_ks_statistic([0, 1], [[0.8, 0.2], [0.2, 0.8]])
"""An example showing the plot_ks_statistic method used by a scikit-learn classifier""" from __future__ import absolute_import import matplotlib.pyplot as plt from scikitplot import classifier_factory from sklearn.linear_model import LogisticRegression from sklearn.datasets import load_breast_cancer as load_data X, y = load_data(return_X_y=True) lr = classifier_factory(LogisticRegression()) lr.plot_ks_statistic(X, y, random_state=1) plt.show() # Using the more flexible functions API from scikitplot import plotters as skplt lr = LogisticRegression() lr = lr.fit(X, y) probas = lr.predict_proba(X) skplt.plot_ks_statistic(y_true=y, y_probas=probas) plt.show()
def plot_ks_statistic(clf, X, y, title='KS Statistic Plot', do_cv=True, cv=None, shuffle=True, random_state=None, ax=None): """Generates the KS Statistic plot for a given classifier and dataset. Args: clf: Classifier instance that implements "fit" and "predict_proba" methods. X (array-like, shape (n_samples, n_features)): Training vector, where n_samples is the number of samples and n_features is the number of features. y (array-like, shape (n_samples) or (n_samples, n_features)): Target relative to X for classification. title (string, optional): Title of the generated plot. Defaults to "KS Statistic Plot". do_cv (bool, optional): If True, the classifier is cross-validated on the dataset using the cross-validation strategy in `cv` to generate the confusion matrix. If False, the confusion matrix is generated without training or cross-validating the classifier. This assumes that the classifier has already been called with its `fit` method beforehand. cv (int, cross-validation generator, iterable, optional): Determines the cross-validation strategy to be used for splitting. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if ``y`` is binary or multiclass, :class:`StratifiedKFold` used. If the estimator is not a classifier or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. shuffle (bool, optional): Used when do_cv is set to True. Determines whether to shuffle the training data before splitting using cross-validation. Default set to True. random_state (int :class:`RandomState`): Pseudo-random number generator state used for random sampling. ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the learning curve. If None, the plot is drawn on a new set of axes. Returns: ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn. Example: >>> lr = classifier_factory(LogisticRegression()) >>> lr.plot_ks_statistic(X, y, random_state=1) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_ks_statistic.png :align: center :alt: KS Statistic """ y = np.array(y) if not hasattr(clf, 'predict_proba'): raise TypeError('"predict_proba" method not in classifier. Cannot calculate ROC Curve.') if not do_cv: probas = clf.predict_proba(X) y_true = y else: if cv is None: cv = StratifiedKFold(shuffle=shuffle, random_state=random_state) elif isinstance(cv, int): cv = StratifiedKFold(n_splits=cv, shuffle=shuffle, random_state=random_state) else: pass clf_clone = clone(clf) preds_list = [] trues_list = [] for train_index, test_index in cv.split(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf_clone.fit(X_train, y_train) preds = clf_clone.predict_proba(X_test) preds_list.append(preds) trues_list.append(y_test) probas = np.concatenate(preds_list, axis=0) y_true = np.concatenate(trues_list) ax = plotters.plot_ks_statistic(y_true, probas, title=title, ax=ax) return ax
def plot_ks_statistic(self): # not working skplt.plot_ks_statistic(self.y_test, self.y_prob) plt.show()
def plot_ks_statistic_with_cv(clf, X, y, title='KS Statistic Plot', do_cv=True, cv=None, shuffle=True, random_state=None, ax=None, figsize=None, title_fontsize="large", text_fontsize="medium"): """Generates the KS Statistic plot for a given classifier and dataset. Args: clf: Classifier instance that implements "fit" and "predict_proba" methods. X (array-like, shape (n_samples, n_features)): Training vector, where n_samples is the number of samples and n_features is the number of features. y (array-like, shape (n_samples) or (n_samples, n_features)): Target relative to X for classification. title (string, optional): Title of the generated plot. Defaults to "KS Statistic Plot". do_cv (bool, optional): If True, the classifier is cross-validated on the dataset using the cross-validation strategy in `cv` to generate the confusion matrix. If False, the confusion matrix is generated without training or cross-validating the classifier. This assumes that the classifier has already been called with its `fit` method beforehand. cv (int, cross-validation generator, iterable, optional): Determines the cross-validation strategy to be used for splitting. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if ``y`` is binary or multiclass, :class:`StratifiedKFold` used. If the estimator is not a classifier or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. shuffle (bool, optional): Used when do_cv is set to True. Determines whether to shuffle the training data before splitting using cross-validation. Default set to True. random_state (int :class:`RandomState`): Pseudo-random number generator state used for random sampling. ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to plot the learning curve. If None, the plot is drawn on a new set of axes. figsize (2-tuple, optional): Tuple denoting figure size of the plot e.g. (6, 6). Defaults to ``None``. title_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "large". text_fontsize (string or int, optional): Matplotlib-style fontsizes. Use e.g. "small", "medium", "large" or integer-values. Defaults to "medium". Returns: ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was drawn. Example: >>> lr = classifier_factory(LogisticRegression()) >>> lr.plot_ks_statistic(X, y, random_state=1) <matplotlib.axes._subplots.AxesSubplot object at 0x7fe967d64490> >>> plt.show() .. image:: _static/examples/plot_ks_statistic.png :align: center :alt: KS Statistic """ y = np.array(y) if not hasattr(clf, 'predict_proba'): raise TypeError('"predict_proba" method not in classifier. ' 'Cannot calculate ROC Curve.') if not do_cv: probas = clf.predict_proba(X) y_true = y else: if cv is None: cv = StratifiedKFold(shuffle=shuffle, random_state=random_state) elif isinstance(cv, int): cv = StratifiedKFold(n_splits=cv, shuffle=shuffle, random_state=random_state) else: pass clf_clone = clone(clf) preds_list = [] trues_list = [] for train_index, test_index in cv.split(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf_clone.fit(X_train, y_train) preds = clf_clone.predict_proba(X_test) preds_list.append(preds) trues_list.append(y_test) probas = np.concatenate(preds_list, axis=0) y_true = np.concatenate(trues_list) ax = plotters.plot_ks_statistic(y_true, probas, title=title, ax=ax, figsize=figsize, title_fontsize=title_fontsize, text_fontsize=text_fontsize) return ax