def test_rfe(): generator = check_random_state(0) iris = load_iris() X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))] X_sparse = sparse.csr_matrix(X) y = iris.target # dense model clf = SVC(kernel="linear") rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1) rfe.fit(X, y) X_r = rfe.transform(X) clf.fit(X_r, y) assert len(rfe.ranking_) == X.shape[1] # sparse model clf_sparse = SVC(kernel="linear") rfe_sparse = RFE(estimator=clf_sparse, n_features_to_select=4, step=0.1) rfe_sparse.fit(X_sparse, y) X_r_sparse = rfe_sparse.transform(X_sparse) assert X_r.shape == iris.data.shape assert_array_almost_equal(X_r[:10], iris.data[:10]) assert_array_almost_equal(rfe.predict(X), clf.predict(iris.data)) assert rfe.score(X, y) == clf.score(iris.data, iris.target) assert_array_almost_equal(X_r, X_r_sparse.toarray())
grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv) grid.fit(X, y) print("The best parameters are %s with a score of %0.2f" % (grid.best_params_, grid.best_score_)) # Now we need to fit a classifier for all parameters in the 2d version # (we use a smaller set of parameters here because it takes a while to train) C_2d_range = [1e-2, 1, 1e2] gamma_2d_range = [1e-1, 1, 1e1] classifiers = [] for C in C_2d_range: for gamma in gamma_2d_range: clf = SVC(C=C, gamma=gamma) clf.fit(X_2d, y_2d) classifiers.append((C, gamma, clf)) # ############################################################################# # Visualization # # draw visualization of parameter effects plt.figure(figsize=(8, 6)) xx, yy = np.meshgrid(np.linspace(-3, 3, 200), np.linspace(-3, 3, 200)) for (k, (C, gamma, clf)) in enumerate(classifiers): # evaluate decision function in a grid Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) Z = Z.reshape(xx.shape) # visualize decision function for these parameters
iris = datasets.load_iris() X = iris.data[:, [0, 2]] y = iris.target # Training classifiers clf1 = DecisionTreeClassifier(max_depth=4) clf2 = KNeighborsClassifier(n_neighbors=7) clf3 = SVC(gamma=.1, kernel='rbf', probability=True) eclf = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2), ('svc', clf3)], voting='soft', weights=[2, 1, 2]) clf1.fit(X, y) clf2.fit(X, y) clf3.fit(X, y) eclf.fit(X, y) # Plotting decision regions x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1)) f, axarr = plt.subplots(2, 2, sharex='col', sharey='row', figsize=(10, 8)) for idx, clf, tt in zip( product([0, 1], [0, 1]), [clf1, clf2, clf3, eclf], ['Decision Tree (depth=4)', 'KNN (k=7)', 'Kernel SVM', 'Soft Voting']): Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
# ------------------------- # First, we load the wine dataset and convert it to a binary classification # problem. Then, we train a support vector classifier on a training dataset. import matplotlib.pyplot as plt from mrex.svm import SVC from mrex.ensemble import RandomForestClassifier from mrex.metrics import plot_roc_curve from mrex.datasets import load_wine from mrex.model_selection import train_test_split X, y = load_wine(return_X_y=True) y = y == 2 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) svc = SVC(random_state=42) svc.fit(X_train, y_train) ############################################################################## # Plotting the ROC Curve # ---------------------- # Next, we plot the ROC curve with a single call to # :func:`mrex.metrics.plot_roc_curve`. The returned `svc_disp` object allows # us to continue using the already computed ROC curve for the SVC in future # plots. svc_disp = plot_roc_curve(svc, X_test, y_test) plt.show() ############################################################################## # Training a Random Forest and Plotting the ROC Curve # -------------------------------------------------------- # We train a random forest classifier and create a plot comparing it to the SVC