def train_classifier(training_data_: List[Dict[str, Any]], perform_feature_selection: bool = False) \ -> MondrianForestClassifier: X = list() y = list() for point in training_data_: X.append( tuple([ v for k, v in point.items() if k not in ['linked', 'issue', 'pr', 'commit'] ])) y.append(1 if point['linked'] else -1) if perform_feature_selection: clf_ = Pipeline([ ('feature_selection', SelectFromModel( RFE(RandomForestClassifier(n_estimators=128, class_weight='balanced_subsample'), 5, step=1))), ('classification', MondrianForestClassifier(n_estimators=16, )) ]) else: clf_ = MondrianForestClassifier(n_estimators=16, ) clf_.partial_fit(X, y) return clf_
def test_partial_fit_equivalence(): X, y = make_regression(random_state=0, n_samples=100) mfr = MondrianForestRegressor(random_state=0) mfr.partial_fit(X, y) for batch_size in [10, 20, 25, 50, 90]: check_partial_fit_equivalence(batch_size, mfr, 0, X, y) X, y = make_classification(random_state=0, n_samples=100) mtc = MondrianForestClassifier(random_state=0) mtc.partial_fit(X, y) for batch_size in [10, 20, 25, 50, 90]: check_partial_fit_equivalence(batch_size, mtc, 0, X, y, is_clf=True)
def test_proba_classif_convergence(): X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6, test_size=0.4) mfc = MondrianForestClassifier(random_state=0) mfc.fit(X_train, y_train) lb = LabelBinarizer() y_bin = lb.fit_transform(y_train) le = LabelEncoder() y_enc = le.fit_transform(y_train) proba = mfc.predict_proba(X_train) labels = mfc.predict(X_train) assert_array_equal(proba, y_bin) assert_array_equal(labels, lb.inverse_transform(y_bin)) # For points completely far away from the training data, this # should converge to the empirical distribution of labels. X_inf = np.vstack( (30.0 * np.ones(X_train.shape[1]), -30.0 * np.ones(X_train.shape[1]))) inf_proba = mfc.predict_proba(X_inf) emp_proba = np.bincount(y_enc) / float(len(y_enc)) assert_array_almost_equal(inf_proba, [emp_proba, emp_proba], 3)
def test_fit_after_partial_fit(): rng = np.random.RandomState(0) X = rng.randn(10, 5) y = np.floor(rng.randn(10)) mfr = MondrianForestRegressor(random_state=0) check_fit_after_partial_fit(mfr, X, y) mfc = MondrianForestClassifier(random_state=0) check_fit_after_partial_fit(mfc, X, y)
def test_forest_attributes(): mr = MondrianForestRegressor(n_estimators=5, random_state=0) mr.fit([[1, 2, 3], [4, 5, 6]], [1, 2]) assert_false(hasattr(mr, "classes_")) assert_false(hasattr(mr, "n_classes_")) mr.partial_fit([[1, 2, 3], [4, 5, 6]], [1, 2]) assert_false(hasattr(mr, "classes_")) assert_false(hasattr(mr, "n_classes_")) mr = MondrianForestClassifier(n_estimators=5, random_state=0) mr.fit([[1, 2, 3], [4, 5, 6]], [1, 2]) assert_true(hasattr(mr, "classes_")) assert_true(hasattr(mr, "n_classes_")) mr = MondrianForestClassifier(n_estimators=5, random_state=0) mr.partial_fit([[1, 2, 3], [4, 5, 6]], [1, 2]) assert_true(hasattr(mr, "classes_")) assert_true(hasattr(mr, "n_classes_"))
def test_min_samples_split(): X_c, y_c = load_digits(return_X_y=True) X_r, y_r = make_regression(n_samples=10000, random_state=0) for mss in [2, 4, 10, 20]: mfr = MondrianForestRegressor(random_state=0, min_samples_split=mss) mfr.partial_fit(X_r[:X_r.shape[0] // 2], y_r[:X_r.shape[0] // 2]) mfr.partial_fit(X_r[X_r.shape[0] // 2:], y_r[X_r.shape[0] // 2:]) for est in mfr.estimators_: n_node_samples = est.tree_.n_node_samples[ est.tree_.children_left != -1] assert_greater(np.min(n_node_samples) + 1, mss) mfc = MondrianForestClassifier(random_state=0, min_samples_split=mss) mfc.partial_fit(X_c[:X_c.shape[0] // 2], y_c[:X_c.shape[0] // 2]) mfc.partial_fit(X_c[X_c.shape[0] // 2:], y_c[X_c.shape[0] // 2:]) for est in mfc.estimators_: n_node_samples = est.tree_.n_node_samples[ est.tree_.children_left != -1] assert_greater(np.min(n_node_samples) + 1, mss)
def test_proba_classif_convergence(): X_train, X_test, y_train, y_test = train_test_split( X, y, train_size=0.6, test_size=0.4) mfc = MondrianForestClassifier(random_state=0) mfc.fit(X_train, y_train) check_proba_classif_convergence(mfc, X_train, y_train) mfc.partial_fit(X_train, y_train) check_proba_classif_convergence(mfc, X_train, y_train)
def get_classifiers_online(n_classes, random_state=42): use_aggregations = [True] n_estimatorss = [10] split_pures = [False] dirichlets = [None] learning_rates = [0.1] for (n_estimators, use_aggregation, split_pure, dirichlet) in product(n_estimatorss, use_aggregations, split_pures, dirichlets): yield ( # "AMF(nt=%s, ag=%s, sp=%s, di=%s)" # % ( # str(n_estimators), # str(use_aggregation), # str(split_pure), # str(dirichlet), # ), "AMF", AMFClassifier( n_classes=n_classes, random_state=random_state, use_aggregation=use_aggregation, n_estimators=n_estimators, split_pure=split_pure, dirichlet=dirichlet, verbose=False, ), ) yield "Dummy", OnlineDummyClassifier(n_classes=n_classes) for n_estimators in n_estimatorss: yield ( "MF", MondrianForestClassifier(n_estimators=n_estimators, random_state=random_state), ) for learning_rate in learning_rates: yield ( # "SGD(%s)" % str(learning_rate), "SGD", SGDClassifier( loss="log", learning_rate="constant", eta0=learning_rate, random_state=random_state, ), )
def experiment_mf(): """Runs experiments for Mondrian Forest""" mf_l = [] train_time_l = [] test_time_l = [] v_m_l = [] s_m_l = [] mf = MondrianForestClassifier(n_estimators=10) for i in range(500): X_t = X_r[i * 100 : (i + 1) * 100] y_t = y_r[i * 100 : (i + 1) * 100] # Train the model start_time = time.perf_counter() mf.partial_fit(X_t, y_t) end_time = time.perf_counter() train_time_l.append(end_time - start_time) # Test the model start_time = time.perf_counter() mf_l.append(prediction(mf)) end_time = time.perf_counter() test_time_l.append(end_time - start_time) # Check memory v_m = psutil.virtual_memory()[2] v_m_l.append(v_m) s_m = psutil.swap_memory()[3] s_m_l.append(s_m) # Reformat the train times for i in range(1, 500): train_time_l[i] += train_time_l[i - 1] return mf_l, train_time_l, test_time_l, v_m_l, s_m_l
def get_classifiers_n_trees_comparison(n_classes, random_state=42): use_aggregations = [True] n_estimatorss = [1, 2, 5, 10, 20, 50] split_pures = [False] dirichlets = [None] for (n_estimators, use_aggregation, split_pure, dirichlet) in product(n_estimatorss, use_aggregations, split_pures, dirichlets): yield ( "AMF(nt=%s)" % str(n_estimators), AMFClassifier( n_classes=n_classes, random_state=random_state, use_aggregation=use_aggregation, n_estimators=n_estimators, split_pure=split_pure, dirichlet=dirichlet, verbose=False, ), ) for n_estimators in n_estimatorss: yield ( "MF(nt=%s)" % str(n_estimators), MondrianForestClassifier(n_estimators=n_estimators, random_state=random_state), ) for n_estimators in n_estimatorss: yield ( "RF(nt=%s)" % str(n_estimators), RandomForestClassifier( n_estimators=n_estimators, class_weight=None, random_state=random_state, n_jobs=1, ), ) for n_estimators in n_estimatorss: yield ( "ET(nt=%s)" % str(n_estimators), ExtraTreesClassifier( n_estimators=n_estimators, class_weight=None, random_state=random_state, n_jobs=1, ), )
def test_probability_values(): from skgarden import MondrianForestClassifier from sklearn.datasets import load_iris import numpy as np iris = load_iris() mfc = MondrianForestClassifier().fit(iris['data'], iris['target']) assert_false( np.max(mfc.predict_proba(iris['data'])) > 1.0, "Probabilities larger than 1.0 in the predictions!") mfc_boot = MondrianForestClassifier(bootstrap=True).fit( iris['data'], iris['target']) assert_false( np.max(mfc_boot.predict_proba(iris['data'])) > 1.0, "Probabilities larger than 1.0 in the predictions!")
def get_classifiers(): return [ ( "AMF", AMFClassifier( n_classes=2, n_estimators=n_estimators, random_state=random_state, use_aggregation=True, split_pure=True, ), ), ( "AMF(no agg)", AMFClassifier( n_classes=2, n_estimators=n_estimators, random_state=random_state, use_aggregation=False, split_pure=True, ), ), ( "MF", MondrianForestClassifier(n_estimators=n_estimators, random_state=random_state), ), ( "RF", RandomForestClassifier(n_estimators=n_estimators, random_state=random_state), ), ( "ET", ExtraTreesClassifier(n_estimators=n_estimators, random_state=random_state), ), ]
def check_partial_fit_equivalence(size_batch, f, random_state, X, y, is_clf=False): start_ptr = list(range(0, 100, size_batch)) end_ptr = start_ptr[1:] + [100] if not is_clf: p_f = MondrianForestRegressor(random_state=random_state) else: p_f = MondrianForestClassifier(random_state=random_state) for start, end in zip(start_ptr, end_ptr): p_f.partial_fit(X[start:end], y[start:end]) for est, p_est in zip(f.estimators_, p_f.estimators_): assert_array_equal(p_est.tree_.n_node_samples, est.tree_.n_node_samples) assert_array_equal(p_est.tree_.threshold, est.tree_.threshold) assert_array_equal(p_est.tree_.feature, est.tree_.feature) assert_equal(p_est.tree_.root, est.tree_.root) assert_array_equal(p_est.tree_.value, est.tree_.value) assert_equal(est.tree_.n_node_samples[est.tree_.root], 100) assert_equal(p_est.tree_.n_node_samples[est.tree_.root], 100)
from skgarden import MondrianForestRegressor train_test_split.__test__ = False boston = load_boston() # The time of split and feature chosen for splitting are highly # scale-sensitive. scaler = MinMaxScaler() X, y = boston.data, boston.target y = np.round(y) X = scaler.fit_transform(X) ensembles = [ MondrianForestRegressor(random_state=0), MondrianForestClassifier(random_state=0)] def check_boston(est): score = est.score(X, y) assert_greater(score, 0.94, "Failed with score = %f" % score) def test_boston(): mr = MondrianForestRegressor(n_estimators=5, random_state=0) mr.fit(X, y) check_boston(mr) mr.partial_fit(X, y) check_boston(mr)
from datasets import readers as all_readers readers = all_readers X, y, dataset_name = readers[13](path) n_samples, n_features = X.shape n_classes = int(y.max() + 1) n_trees = 1 X_train, X_test, \ y_train, y_test = train_test_split(X, y, stratify=y, test_size=.3, random_state=123) mf = MondrianForestClassifier(n_estimators=n_trees) mf.partial_fit(X_train, y_train, classes=np.arange(n_classes)) # mf.apply(X_test).max(axis=1).max() # mf_paths, mf_est_inds = mf.weighted_decision_path(X_test) # mf_paths.shape, mf_est_inds.shape # i = 0 # mf_paths[:, mf_est_inds[i]: mf_est_inds[i + 1]] of1 = OnlineForestClassifier(n_classes=n_classes, seed=123, use_aggregation=True, n_trees=n_trees, dirichlet=0.5, step=1., use_feature_importances=False)
clf_dict = { "AdaBoost": AdaBoostClassifier(base_estimator=None, n_estimators=100, learning_rate=1.0, random_state=random_state), "XGBoost": xgb.XGBClassifier(booster="gbtree", n_estimators=100, random_state=random_state, n_jobs=-1), "mondrian_forest": MondrianForestClassifier(n_estimators=10, min_samples_split=2, bootstrap=False, n_jobs=-1, random_state=random_state, verbose=0), "random_forest": RandomForestClassifier(n_estimators=100, criterion="gini", min_samples_split=2, bootstrap=True, n_jobs=-1, random_state=random_state), "1layer_NN": MLPClassifier(hidden_layer_sizes=(100, ), activation="relu", solver="adam", random_state=random_state) }
def experiment(angle, classifiers, n_xor, n_rxor, n_test): """Perform XOR RXOR(XNOR) XOR experiment""" X_xor, y_xor = generate_gaussian_parity(n_xor) X_rxor, y_rxor = generate_gaussian_parity(n_rxor, angle_params=angle) X_xor_2, y_xor_2 = generate_gaussian_parity(n_xor) test_x_xor, test_y_xor = generate_gaussian_parity(n_test) test_x_rxor, test_y_rxor = generate_gaussian_parity(n_test, angle_params=angle) X_stream = np.concatenate((X_xor, X_rxor, X_xor_2), axis=0) y_stream = np.concatenate((y_xor, y_rxor, y_xor_2), axis=0) # Instantiate classifiers if classifiers[0] == 1: ht = tree.HoeffdingTreeClassifier(grace_period=2, split_confidence=1e-01) if classifiers[1] == 1: mf = MondrianForestClassifier(n_estimators=10) if classifiers[2] == 1: sdt = DecisionTreeClassifier() if classifiers[3] == 1: sdf = StreamDecisionForest() if classifiers[4] == 1: synf = LifelongClassificationForest(default_n_estimators=10) errors = np.zeros((10, int(X_stream.shape[0] / 25))) for i in range(int(X_stream.shape[0] / 25)): X = X_stream[i * 25:(i + 1) * 25] y = y_stream[i * 25:(i + 1) * 25] # Hoeffding Tree Classifier if classifiers[0] == 1: ht_partial_fit(ht, X, y) ht_xor_y_hat, ht_rxor_y_hat = ht_predict(ht, test_x_xor, test_x_rxor) errors[0, i] = 1 - np.mean(ht_xor_y_hat == test_y_xor) errors[1, i] = 1 - np.mean(ht_rxor_y_hat == test_y_rxor) # Mondrian Forest Classifier if classifiers[1] == 1: mf.partial_fit(X, y) mf_xor_y_hat = mf.predict(test_x_xor) mf_rxor_y_hat = mf.predict(test_x_rxor) errors[2, i] = 1 - np.mean(mf_xor_y_hat == test_y_xor) errors[3, i] = 1 - np.mean(mf_rxor_y_hat == test_y_rxor) # Stream Decision Tree Classifier if classifiers[2] == 1: sdt.partial_fit(X, y, classes=[0, 1]) sdt_xor_y_hat = sdt.predict(test_x_xor) sdt_rxor_y_hat = sdt.predict(test_x_rxor) errors[4, i] = 1 - np.mean(sdt_xor_y_hat == test_y_xor) errors[5, i] = 1 - np.mean(sdt_rxor_y_hat == test_y_rxor) # Stream Decision Forest Classifier if classifiers[3] == 1: sdf.partial_fit(X, y, classes=[0, 1]) sdf_xor_y_hat = sdf.predict(test_x_xor) sdf_rxor_y_hat = sdf.predict(test_x_rxor) errors[6, i] = 1 - np.mean(sdf_xor_y_hat == test_y_xor) errors[7, i] = 1 - np.mean(sdf_rxor_y_hat == test_y_rxor) # Synergistic Forest Classifier if classifiers[4] == 1: if i == 0: synf.add_task(X, y, n_estimators=10, task_id=0) synf_xor_y_hat = synf.predict(test_x_xor, task_id=0) elif i < (n_xor / 25): synf.update_task(X, y, task_id=0) synf_xor_y_hat = synf.predict(test_x_xor, task_id=0) elif i == (n_xor / 25): synf.add_task(X, y, n_estimators=10, task_id=1) synf_xor_y_hat = synf.predict(test_x_xor, task_id=0) synf_rxor_y_hat = synf.predict(test_x_rxor, task_id=1) elif i < (n_xor + n_rxor) / 25: synf.update_task(X, y, task_id=1) synf_xor_y_hat = synf.predict(test_x_xor, task_id=0) synf_rxor_y_hat = synf.predict(test_x_rxor, task_id=1) elif i < (2 * n_xor + n_rxor) / 25: synf.update_task(X, y, task_id=0) synf_xor_y_hat = synf.predict(test_x_xor, task_id=0) synf_rxor_y_hat = synf.predict(test_x_rxor, task_id=1) if i < (n_xor / 25): errors[8, i] = 1 - np.mean(synf_xor_y_hat == test_y_xor) if i >= (n_xor / 25): errors[8, i] = 1 - np.mean(synf_xor_y_hat == test_y_xor) errors[9, i] = 1 - np.mean(synf_rxor_y_hat == test_y_rxor) return errors
linearly_separable = (X, y) datasets = [ make_moons(n_samples=n_samples, noise=0.3, random_state=0), make_circles(n_samples=n_samples, noise=0.2, factor=0.5, random_state=random_state), linearly_separable ] n_trees = 10 classifiers = [('OMAF', OnlineForestClassifier(n_classes=n_classes, n_trees=n_trees, seed=123, use_aggregation=True, split_pure=True, memory=512)), ('MF', MondrianForestClassifier(n_estimators=n_trees)), ('RF', RandomForestClassifier(n_estimators=n_trees)), ('ET', ExtraTreesClassifier(n_estimators=n_trees))] X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=.4, random_state=42) plot_decision_classification(classifiers, datasets) logging.info("Saved the decision functions in 'decision.pdf") plt.savefig('decisions.pdf')
def run_method_on_dataset(method, dataset, n_iter, n_batches, n_estimators, max_depth): mean_fit_time = [] mean_train_acc = [] mean_test_acc = [] for i in range(n_iter): if method == 'classical_full_data': clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1) fit_time, train_acc, test_acc = classical_rf_refit( clf, dataset, n_batches) elif method == 'classical_window_1': clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1) fit_time, train_acc, test_acc = classical_rf_window(clf, dataset, n_batches, h=1) elif method == 'classical_window_3': clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1) fit_time, train_acc, test_acc = classical_rf_window(clf, dataset, n_batches, h=3) elif method == 'classical_window_5': clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1) fit_time, train_acc, test_acc = classical_rf_window(clf, dataset, n_batches, h=5) elif method == 'classical_increment_frac_0.2': clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, warm_start=True) fit_time, train_acc, test_acc = classical_rf_incremental( clf, dataset, n_batches, new_frac=0.2) elif method == 'classical_increment_frac_0.5': clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, warm_start=True) fit_time, train_acc, test_acc = classical_rf_incremental( clf, dataset, n_batches, new_frac=0.5) elif method == 'extratrees_full_data': clf = ExtraTreesClassifier(n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1) fit_time, train_acc, test_acc = classical_rf_refit( clf, dataset, n_batches) elif method == 'extratrees_window_1': clf = ExtraTreesClassifier(n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1) fit_time, train_acc, test_acc = classical_rf_window(clf, dataset, n_batches, h=1) elif method == 'extratrees_window_3': clf = ExtraTreesClassifier(n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1) fit_time, train_acc, test_acc = classical_rf_window(clf, dataset, n_batches, h=3) elif method == 'extratrees_window_5': clf = ExtraTreesClassifier(n_estimators=n_estimators, max_depth=max_depth, n_jobs=-1) fit_time, train_acc, test_acc = classical_rf_window(clf, dataset, n_batches, h=5) elif method == 'extratrees_increment_frac_0.2': clf = ExtraTreesClassifier(n_estimators=n_estimators, max_depth=max_depth, warm_start=True) fit_time, train_acc, test_acc = classical_rf_incremental( clf, dataset, n_batches, new_frac=0.2) elif method == 'extratrees_increment_frac_0.5': clf = ExtraTreesClassifier(n_estimators=n_estimators, max_depth=max_depth, warm_start=True) fit_time, train_acc, test_acc = classical_rf_incremental( clf, dataset, n_batches, new_frac=0.5) elif method == 'mondrian_skgarden': clf = MondrianForestClassifier(n_estimators=n_estimators, max_depth=max_depth) fit_time, train_acc, test_acc = mondrian_rf_skgarden( clf, dataset, n_batches) elif method == 'mondrian': clf = OurMondrianForestClassifier(n_estimators=n_estimators, budget=max_depth) fit_time, train_acc, test_acc = mondrian_rf_our( clf, dataset, n_batches) mean_fit_time.append(fit_time) mean_train_acc.append(train_acc) mean_test_acc.append(test_acc) mean_fit_time = np.mean(mean_fit_time, axis=0) mean_train_acc = np.mean(mean_train_acc, axis=0) mean_test_acc = np.mean(mean_test_acc, axis=0) return mean_fit_time, mean_train_acc, mean_test_acc
def __init__(self, rf_estimators=15, rf_max_depth=2, rf_n_jobs=-1): super(MondorianForest, self).__init__(name="Mondorian Forest") self.model = MondrianForestClassifier() logging.basicConfig(filename=path + "reports\\" + "mf.log", level=logging.info) logging.info("Mondorian Forest Log created")
def get_classifiers_batch(n_classes, random_state=42): use_aggregations = [True] n_estimatorss = [10] split_pures = [False] dirichlets = [None] learning_rates = [1e-1] for (n_estimators, use_aggregation, split_pure, dirichlet) in product(n_estimatorss, use_aggregations, split_pures, dirichlets): yield ( # "AMF(nt=%s, ag=%s, sp=%s, di=%s)" # % ( # str(n_estimators), # str(use_aggregation), # str(split_pure), # str(dirichlet), # ), "AMF", AMFClassifier( n_classes=n_classes, random_state=random_state, use_aggregation=use_aggregation, n_estimators=n_estimators, split_pure=split_pure, dirichlet=dirichlet, verbose=False, ), ) for n_estimators in n_estimatorss: yield ( # "MF(nt=%s)" % str(n_estimators), "MF", MondrianForestClassifier(n_estimators=n_estimators, random_state=random_state), ) for n_estimators in n_estimatorss: yield ( # "RF(nt=%s)" % str(n_estimators), "RF", RandomForestClassifier( n_estimators=n_estimators, class_weight=None, random_state=random_state, n_jobs=1, ), ) for n_estimators in n_estimatorss: yield ( # "ET(nt=%s)" % str(n_estimators), "ET", ExtraTreesClassifier( n_estimators=n_estimators, class_weight=None, random_state=random_state, n_jobs=1, ), ) for learning_rate in learning_rates: yield ( # "SGD(%s)" % str(learning_rate), "SGD", SGDClassifier( loss="log", learning_rate="constant", eta0=learning_rate, random_state=random_state, ), )
dirichlet=dirichlet, # n_samples_increment=, step=step, verbose=False, ) ofc = OnlineForestClassifier( n_classes=n_classes, random_state=random_state, use_aggregation=use_aggregation, n_estimators=n_estimators, split_pure=split_pure, dirichlet=dirichlet, step=step, verbose=False, ) mfc = MondrianForestClassifier(n_estimators=n_estimators, random_state=random_state) logging.info("Fitting AMF...") t1 = time() amf.partial_fit(X_train, y_train) t2 = time() logging.info("Done. time fit AMF: " + "%.2f" % (t2 - t1) + " seconds") logging.info("Fitting OFC...") t1 = time() ofc.partial_fit(X_train, y_train) t2 = time() logging.info("Done. time fit OFC:" + "%.2f" % (t2 - t1) + " seconds") logging.info("Fitting MFC...") t1 = time()
def get_mf_decision(n_estimators): clf = MondrianForestClassifier(n_estimators=n_estimators, random_state=random_state) clf.partial_fit(X, y) zz = clf.predict_proba(X_mesh)[:, 1].reshape(xx.shape) return zz