def balance_class_balance(path="images/class_balance.png"): data = load_game() y = data["outcome"] oz = ClassBalance(labels=["draw", "loss", "win"]) oz.fit(y) return oz.poof(outpath=path)
def class_prediction_error(ax=None): data = load_game(return_dataset=True) X, y = data.to_numpy() X = OneHotEncoder().fit_transform(X).toarray() viz = ClassPredictionError(GaussianNB(), ax=ax) return tts_plot(viz, X, y)
def rocauc(dataset): if dataset == "binary": X, y = load_occupancy() model = GaussianNB() elif dataset == "multiclass": X, y = load_game() X = OrdinalEncoder().fit_transform(X) model = RidgeClassifier() else: raise ValueError("uknown dataset") X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2) oz = ROCAUC(model, ax=newfig()) oz.fit(X_train, y_train) oz.score(X_test, y_test) savefig(oz, "rocauc_{}".format(dataset))
def validation_curve_classifier_knn(path="images/validation_curve_classifier_knn.png"): X, y = load_game() X = OneHotEncoder().fit_transform(X) _, ax = plt.subplots() cv = StratifiedKFold(4) param_range = np.arange(3, 20, 2) print("warning: generating the KNN validation curve can take a very long time!") oz = ValidationCurve( KNeighborsClassifier(), ax=ax, param_name="n_neighbors", param_range=param_range, cv=cv, scoring="f1_weighted", n_jobs=8, ) oz.fit(X, y) oz.show(outpath=path)
def validation_curve_classifier_svc(path="images/validation_curve_classifier_svc.png"): X, y = load_game() X = OneHotEncoder().fit_transform(X) _, ax = plt.subplots() cv = StratifiedKFold(12) param_range = np.logspace(-6, -1, 12) print("warning: generating the SVC validation curve can take a very long time!") oz = ValidationCurve( SVC(), ax=ax, param_name="gamma", param_range=param_range, logx=True, cv=cv, scoring="f1_weighted", n_jobs=8, ) oz.fit(X, y) oz.show(outpath=path)
def prcurve(dataset): if dataset == "binary": X, y = load_spam() model = RidgeClassifier() kws = {} elif dataset == "multiclass": X, y = load_game() X = OrdinalEncoder().fit_transform(X) y = LabelEncoder().fit_transform(y) model = MultinomialNB() kws = { "per_class": True, "iso_f1_curves": True, "fill_area": False, "micro": False, } else: raise ValueError("uknown dataset") X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, shuffle=True) oz = PRCurve(model, ax=newfig(), **kws) oz.fit(X_train, y_train) oz.score(X_test, y_test) savefig(oz, "precision_recall_{}".format(dataset))