def __init__(self, n_estimators=100, max_features="auto", max_depth=6, min_samples_split=2, min_weight_fraction_leaf=0.0, n_jobs=-1, random_state=None, random_seed=0, **kwargs): parameters = { "n_estimators": n_estimators, "max_features": max_features, "max_depth": max_depth, "min_samples_split": min_samples_split, "min_weight_fraction_leaf": min_weight_fraction_leaf, "n_jobs": n_jobs } parameters.update(kwargs) random_seed = deprecate_arg("random_state", "random_seed", random_state, random_seed) et_classifier = SKExtraTreesClassifier(random_state=random_seed, **parameters) super().__init__(parameters=parameters, component_obj=et_classifier, random_seed=random_seed)
def test_feature_importance(X_y_binary): X, y = X_y_binary clf = ExtraTreesClassifier(n_jobs=1) sk_clf = SKExtraTreesClassifier(max_depth=6, random_state=0, n_jobs=1) sk_clf.fit(X, y) sk_feature_importance = sk_clf.feature_importances_ clf.fit(X, y) feature_importance = clf.feature_importance np.testing.assert_almost_equal(sk_feature_importance, feature_importance, decimal=5)
def test_fit_predict_binary(X_y_binary): X, y = X_y_binary sk_clf = SKExtraTreesClassifier(max_depth=6, random_state=0) sk_clf.fit(X, y) y_pred_sk = sk_clf.predict(X) y_pred_proba_sk = sk_clf.predict_proba(X) clf = ExtraTreesClassifier() clf.fit(X, y) y_pred = clf.predict(X) y_pred_proba = clf.predict_proba(X) np.testing.assert_almost_equal(y_pred, y_pred_sk, decimal=5) np.testing.assert_almost_equal(y_pred_proba, y_pred_proba_sk, decimal=5)
def test_fit_predict_multi(X_y_multi): X, y = X_y_multi sk_clf = SKExtraTreesClassifier(max_depth=6, random_state=0) sk_clf.fit(X, y) y_pred_sk = sk_clf.predict(X) y_pred_proba_sk = sk_clf.predict_proba(X) clf = ExtraTreesClassifier() fitted = clf.fit(X, y) assert isinstance(fitted, ExtraTreesClassifier) y_pred = clf.predict(X) y_pred_proba = clf.predict_proba(X) np.testing.assert_almost_equal(y_pred, y_pred_sk, decimal=5) np.testing.assert_almost_equal(y_pred_proba, y_pred_proba_sk, decimal=5)
from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.tree import DecisionTreeClassifier from sklearn.datasets import load_breast_cancer, load_iris, load_wine from extra_trees.ensemble.forest import ExtraTreesClassifier # prepare models classification_models = [ ('Logistic', LogisticRegression()), ('Nearest Neighbors', KNeighborsClassifier()), ('SVM', SVC()), ('DecisionTree', DecisionTreeClassifier()), ('RandomForest', RandomForestClassifier()), ('ExtraTrees (SciKit)', SKExtraTreesClassifier()), ('ExtraTrees', ExtraTreesClassifier()), ] seed = 7 print("breast_cancer") breast_cancer = load_breast_cancer() X, y = breast_cancer.data, breast_cancer.target # evaluate each model in turn results = [] names = [] scoring = 'accuracy' for name, model in classification_models: kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model, X, y, cv=kfold, scoring=scoring) results.append(cv_results)
"ExtraTrees (SciKit)", "ExtraTrees", ] classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True), MLPClassifier(alpha=1), GaussianNB(), QuadraticDiscriminantAnalysis(), AdaBoostClassifier(), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=2), SKExtraTreesClassifier(n_estimators=10, max_features=2), ExtraTreesClassifier(n_estimators=10, max_features=2), ] X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) datasets = [ make_moons(noise=0.3, random_state=0), make_circles(noise=0.2, factor=0.5, random_state=1), linearly_separable
n_redundant=2) train_samples = 100 # Samples used for training the models X_train = X[:train_samples] X_test = X[train_samples:] y_train = y[:train_samples] y_test = y[train_samples:] # Create classifiers lr = LogisticRegression() gnb = GaussianNB() svc = LinearSVC(C=1.0) dtc = DecisionTreeClassifier(min_samples_split=2) rfc = RandomForestClassifier(n_estimators=100) sk_etc = SKExtraTreesClassifier(n_estimators=100) etc = ExtraTreesClassifier(n_estimators=100) # ############################################################################# # Plot calibration plots plt.figure(figsize=(10, 10)) ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2) ax2 = plt.subplot2grid((3, 1), (2, 0)) ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated") for clf, name in [ (lr, 'Logistic'), (gnb, 'Naive Bayes'), (svc, 'Support Vector Classification'), (dtc, 'Decision Tree'),