示例#1
0
def train_classifier(training_data_: List[Dict[str, Any]], perform_feature_selection: bool = False) \
        -> MondrianForestClassifier:
    X = list()
    y = list()
    for point in training_data_:
        X.append(
            tuple([
                v for k, v in point.items()
                if k not in ['linked', 'issue', 'pr', 'commit']
            ]))
        y.append(1 if point['linked'] else -1)
    if perform_feature_selection:
        clf_ = Pipeline([
            ('feature_selection',
             SelectFromModel(
                 RFE(RandomForestClassifier(n_estimators=128,
                                            class_weight='balanced_subsample'),
                     5,
                     step=1))),
            ('classification', MondrianForestClassifier(n_estimators=16, ))
        ])
    else:
        clf_ = MondrianForestClassifier(n_estimators=16, )
    clf_.partial_fit(X, y)
    return clf_
示例#2
0
def test_partial_fit_equivalence():
    X, y = make_regression(random_state=0, n_samples=100)
    mfr = MondrianForestRegressor(random_state=0)
    mfr.partial_fit(X, y)
    for batch_size in [10, 20, 25, 50, 90]:
        check_partial_fit_equivalence(batch_size, mfr, 0, X, y)

    X, y = make_classification(random_state=0, n_samples=100)
    mtc = MondrianForestClassifier(random_state=0)
    mtc.partial_fit(X, y)
    for batch_size in [10, 20, 25, 50, 90]:
        check_partial_fit_equivalence(batch_size, mtc, 0, X, y, is_clf=True)
示例#3
0
def test_proba_classif_convergence():
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.6,
                                                        test_size=0.4)
    mfc = MondrianForestClassifier(random_state=0)
    mfc.fit(X_train, y_train)

    lb = LabelBinarizer()
    y_bin = lb.fit_transform(y_train)
    le = LabelEncoder()
    y_enc = le.fit_transform(y_train)

    proba = mfc.predict_proba(X_train)
    labels = mfc.predict(X_train)
    assert_array_equal(proba, y_bin)
    assert_array_equal(labels, lb.inverse_transform(y_bin))

    # For points completely far away from the training data, this
    # should converge to the empirical distribution of labels.
    X_inf = np.vstack(
        (30.0 * np.ones(X_train.shape[1]), -30.0 * np.ones(X_train.shape[1])))
    inf_proba = mfc.predict_proba(X_inf)
    emp_proba = np.bincount(y_enc) / float(len(y_enc))
    assert_array_almost_equal(inf_proba, [emp_proba, emp_proba], 3)
示例#4
0
def test_fit_after_partial_fit():
    rng = np.random.RandomState(0)
    X = rng.randn(10, 5)
    y = np.floor(rng.randn(10))
    mfr = MondrianForestRegressor(random_state=0)
    check_fit_after_partial_fit(mfr, X, y)

    mfc = MondrianForestClassifier(random_state=0)
    check_fit_after_partial_fit(mfc, X, y)
示例#5
0
def test_forest_attributes():
    mr = MondrianForestRegressor(n_estimators=5, random_state=0)
    mr.fit([[1, 2, 3], [4, 5, 6]], [1, 2])
    assert_false(hasattr(mr, "classes_"))
    assert_false(hasattr(mr, "n_classes_"))

    mr.partial_fit([[1, 2, 3], [4, 5, 6]], [1, 2])
    assert_false(hasattr(mr, "classes_"))
    assert_false(hasattr(mr, "n_classes_"))

    mr = MondrianForestClassifier(n_estimators=5, random_state=0)
    mr.fit([[1, 2, 3], [4, 5, 6]], [1, 2])
    assert_true(hasattr(mr, "classes_"))
    assert_true(hasattr(mr, "n_classes_"))

    mr = MondrianForestClassifier(n_estimators=5, random_state=0)
    mr.partial_fit([[1, 2, 3], [4, 5, 6]], [1, 2])
    assert_true(hasattr(mr, "classes_"))
    assert_true(hasattr(mr, "n_classes_"))
示例#6
0
def test_min_samples_split():
    X_c, y_c = load_digits(return_X_y=True)
    X_r, y_r = make_regression(n_samples=10000, random_state=0)

    for mss in [2, 4, 10, 20]:
        mfr = MondrianForestRegressor(random_state=0, min_samples_split=mss)
        mfr.partial_fit(X_r[:X_r.shape[0] // 2], y_r[:X_r.shape[0] // 2])
        mfr.partial_fit(X_r[X_r.shape[0] // 2:], y_r[X_r.shape[0] // 2:])
        for est in mfr.estimators_:
            n_node_samples = est.tree_.n_node_samples[
                est.tree_.children_left != -1]
            assert_greater(np.min(n_node_samples) + 1, mss)

        mfc = MondrianForestClassifier(random_state=0, min_samples_split=mss)
        mfc.partial_fit(X_c[:X_c.shape[0] // 2], y_c[:X_c.shape[0] // 2])
        mfc.partial_fit(X_c[X_c.shape[0] // 2:], y_c[X_c.shape[0] // 2:])
        for est in mfc.estimators_:
            n_node_samples = est.tree_.n_node_samples[
                est.tree_.children_left != -1]
            assert_greater(np.min(n_node_samples) + 1, mss)
示例#7
0
def test_proba_classif_convergence():
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=0.6, test_size=0.4)
    mfc = MondrianForestClassifier(random_state=0)
    mfc.fit(X_train, y_train)
    check_proba_classif_convergence(mfc, X_train, y_train)
    mfc.partial_fit(X_train, y_train)
    check_proba_classif_convergence(mfc, X_train, y_train)
示例#8
0
def get_classifiers_online(n_classes, random_state=42):
    use_aggregations = [True]
    n_estimatorss = [10]
    split_pures = [False]
    dirichlets = [None]
    learning_rates = [0.1]

    for (n_estimators, use_aggregation, split_pure,
         dirichlet) in product(n_estimatorss, use_aggregations, split_pures,
                               dirichlets):
        yield (
            # "AMF(nt=%s, ag=%s, sp=%s, di=%s)"
            # % (
            #     str(n_estimators),
            #     str(use_aggregation),
            #     str(split_pure),
            #     str(dirichlet),
            # ),
            "AMF",
            AMFClassifier(
                n_classes=n_classes,
                random_state=random_state,
                use_aggregation=use_aggregation,
                n_estimators=n_estimators,
                split_pure=split_pure,
                dirichlet=dirichlet,
                verbose=False,
            ),
        )

    yield "Dummy", OnlineDummyClassifier(n_classes=n_classes)

    for n_estimators in n_estimatorss:
        yield (
            "MF",
            MondrianForestClassifier(n_estimators=n_estimators,
                                     random_state=random_state),
        )

    for learning_rate in learning_rates:
        yield (
            # "SGD(%s)" % str(learning_rate),
            "SGD",
            SGDClassifier(
                loss="log",
                learning_rate="constant",
                eta0=learning_rate,
                random_state=random_state,
            ),
        )
示例#9
0
文件: cifar10.py 项目: PSSF23/SPDT
def experiment_mf():
    """Runs experiments for Mondrian Forest"""
    mf_l = []
    train_time_l = []
    test_time_l = []
    v_m_l = []
    s_m_l = []

    mf = MondrianForestClassifier(n_estimators=10)

    for i in range(500):
        X_t = X_r[i * 100 : (i + 1) * 100]
        y_t = y_r[i * 100 : (i + 1) * 100]

        # Train the model
        start_time = time.perf_counter()
        mf.partial_fit(X_t, y_t)
        end_time = time.perf_counter()
        train_time_l.append(end_time - start_time)

        # Test the model
        start_time = time.perf_counter()
        mf_l.append(prediction(mf))
        end_time = time.perf_counter()
        test_time_l.append(end_time - start_time)

        # Check memory
        v_m = psutil.virtual_memory()[2]
        v_m_l.append(v_m)
        s_m = psutil.swap_memory()[3]
        s_m_l.append(s_m)

    # Reformat the train times
    for i in range(1, 500):
        train_time_l[i] += train_time_l[i - 1]

    return mf_l, train_time_l, test_time_l, v_m_l, s_m_l
示例#10
0
def get_classifiers_n_trees_comparison(n_classes, random_state=42):
    use_aggregations = [True]
    n_estimatorss = [1, 2, 5, 10, 20, 50]
    split_pures = [False]
    dirichlets = [None]
    for (n_estimators, use_aggregation, split_pure,
         dirichlet) in product(n_estimatorss, use_aggregations, split_pures,
                               dirichlets):
        yield (
            "AMF(nt=%s)" % str(n_estimators),
            AMFClassifier(
                n_classes=n_classes,
                random_state=random_state,
                use_aggregation=use_aggregation,
                n_estimators=n_estimators,
                split_pure=split_pure,
                dirichlet=dirichlet,
                verbose=False,
            ),
        )

    for n_estimators in n_estimatorss:
        yield (
            "MF(nt=%s)" % str(n_estimators),
            MondrianForestClassifier(n_estimators=n_estimators,
                                     random_state=random_state),
        )

    for n_estimators in n_estimatorss:
        yield (
            "RF(nt=%s)" % str(n_estimators),
            RandomForestClassifier(
                n_estimators=n_estimators,
                class_weight=None,
                random_state=random_state,
                n_jobs=1,
            ),
        )

    for n_estimators in n_estimatorss:
        yield (
            "ET(nt=%s)" % str(n_estimators),
            ExtraTreesClassifier(
                n_estimators=n_estimators,
                class_weight=None,
                random_state=random_state,
                n_jobs=1,
            ),
        )
示例#11
0
def test_probability_values():
    from skgarden import MondrianForestClassifier
    from sklearn.datasets import load_iris
    import numpy as np

    iris = load_iris()

    mfc = MondrianForestClassifier().fit(iris['data'], iris['target'])
    assert_false(
        np.max(mfc.predict_proba(iris['data'])) > 1.0,
        "Probabilities larger than 1.0 in the predictions!")

    mfc_boot = MondrianForestClassifier(bootstrap=True).fit(
        iris['data'], iris['target'])
    assert_false(
        np.max(mfc_boot.predict_proba(iris['data'])) > 1.0,
        "Probabilities larger than 1.0 in the predictions!")
示例#12
0
def get_classifiers():
    return [
        (
            "AMF",
            AMFClassifier(
                n_classes=2,
                n_estimators=n_estimators,
                random_state=random_state,
                use_aggregation=True,
                split_pure=True,
            ),
        ),
        (
            "AMF(no agg)",
            AMFClassifier(
                n_classes=2,
                n_estimators=n_estimators,
                random_state=random_state,
                use_aggregation=False,
                split_pure=True,
            ),
        ),
        (
            "MF",
            MondrianForestClassifier(n_estimators=n_estimators,
                                     random_state=random_state),
        ),
        (
            "RF",
            RandomForestClassifier(n_estimators=n_estimators,
                                   random_state=random_state),
        ),
        (
            "ET",
            ExtraTreesClassifier(n_estimators=n_estimators,
                                 random_state=random_state),
        ),
    ]
示例#13
0
def check_partial_fit_equivalence(size_batch,
                                  f,
                                  random_state,
                                  X,
                                  y,
                                  is_clf=False):
    start_ptr = list(range(0, 100, size_batch))
    end_ptr = start_ptr[1:] + [100]
    if not is_clf:
        p_f = MondrianForestRegressor(random_state=random_state)
    else:
        p_f = MondrianForestClassifier(random_state=random_state)
    for start, end in zip(start_ptr, end_ptr):
        p_f.partial_fit(X[start:end], y[start:end])
    for est, p_est in zip(f.estimators_, p_f.estimators_):
        assert_array_equal(p_est.tree_.n_node_samples,
                           est.tree_.n_node_samples)
        assert_array_equal(p_est.tree_.threshold, est.tree_.threshold)
        assert_array_equal(p_est.tree_.feature, est.tree_.feature)
        assert_equal(p_est.tree_.root, est.tree_.root)
        assert_array_equal(p_est.tree_.value, est.tree_.value)
        assert_equal(est.tree_.n_node_samples[est.tree_.root], 100)
        assert_equal(p_est.tree_.n_node_samples[est.tree_.root], 100)
示例#14
0
from skgarden import MondrianForestRegressor

train_test_split.__test__ = False

boston = load_boston()
# The time of split and feature chosen for splitting are highly
# scale-sensitive.
scaler = MinMaxScaler()
X, y = boston.data, boston.target

y = np.round(y)
X = scaler.fit_transform(X)

ensembles = [
    MondrianForestRegressor(random_state=0),
    MondrianForestClassifier(random_state=0)]


def check_boston(est):
    score = est.score(X, y)
    assert_greater(score, 0.94, "Failed with score = %f" % score)


def test_boston():
    mr = MondrianForestRegressor(n_estimators=5, random_state=0)
    mr.fit(X, y)
    check_boston(mr)
    mr.partial_fit(X, y)
    check_boston(mr)

示例#15
0
from datasets import readers as all_readers

readers = all_readers

X, y, dataset_name = readers[13](path)

n_samples, n_features = X.shape
n_classes = int(y.max() + 1)
n_trees = 1

X_train, X_test, \
    y_train, y_test = train_test_split(X, y, stratify=y,
                                       test_size=.3, random_state=123)

mf = MondrianForestClassifier(n_estimators=n_trees)
mf.partial_fit(X_train, y_train, classes=np.arange(n_classes))

# mf.apply(X_test).max(axis=1).max()
# mf_paths, mf_est_inds = mf.weighted_decision_path(X_test)
# mf_paths.shape, mf_est_inds.shape
# i = 0
# mf_paths[:, mf_est_inds[i]: mf_est_inds[i + 1]]

of1 = OnlineForestClassifier(n_classes=n_classes,
                             seed=123,
                             use_aggregation=True,
                             n_trees=n_trees,
                             dirichlet=0.5,
                             step=1.,
                             use_feature_importances=False)
示例#16
0
    clf_dict = {
        "AdaBoost":
        AdaBoostClassifier(base_estimator=None,
                           n_estimators=100,
                           learning_rate=1.0,
                           random_state=random_state),
        "XGBoost":
        xgb.XGBClassifier(booster="gbtree",
                          n_estimators=100,
                          random_state=random_state,
                          n_jobs=-1),
        "mondrian_forest":
        MondrianForestClassifier(n_estimators=10,
                                 min_samples_split=2,
                                 bootstrap=False,
                                 n_jobs=-1,
                                 random_state=random_state,
                                 verbose=0),
        "random_forest":
        RandomForestClassifier(n_estimators=100,
                               criterion="gini",
                               min_samples_split=2,
                               bootstrap=True,
                               n_jobs=-1,
                               random_state=random_state),
        "1layer_NN":
        MLPClassifier(hidden_layer_sizes=(100, ),
                      activation="relu",
                      solver="adam",
                      random_state=random_state)
    }
示例#17
0
def experiment(angle, classifiers, n_xor, n_rxor, n_test):
    """Perform XOR RXOR(XNOR) XOR experiment"""
    X_xor, y_xor = generate_gaussian_parity(n_xor)
    X_rxor, y_rxor = generate_gaussian_parity(n_rxor, angle_params=angle)
    X_xor_2, y_xor_2 = generate_gaussian_parity(n_xor)
    test_x_xor, test_y_xor = generate_gaussian_parity(n_test)
    test_x_rxor, test_y_rxor = generate_gaussian_parity(n_test,
                                                        angle_params=angle)
    X_stream = np.concatenate((X_xor, X_rxor, X_xor_2), axis=0)
    y_stream = np.concatenate((y_xor, y_rxor, y_xor_2), axis=0)

    # Instantiate classifiers
    if classifiers[0] == 1:
        ht = tree.HoeffdingTreeClassifier(grace_period=2,
                                          split_confidence=1e-01)
    if classifiers[1] == 1:
        mf = MondrianForestClassifier(n_estimators=10)
    if classifiers[2] == 1:
        sdt = DecisionTreeClassifier()
    if classifiers[3] == 1:
        sdf = StreamDecisionForest()
    if classifiers[4] == 1:
        synf = LifelongClassificationForest(default_n_estimators=10)

    errors = np.zeros((10, int(X_stream.shape[0] / 25)))

    for i in range(int(X_stream.shape[0] / 25)):
        X = X_stream[i * 25:(i + 1) * 25]
        y = y_stream[i * 25:(i + 1) * 25]

        # Hoeffding Tree Classifier
        if classifiers[0] == 1:
            ht_partial_fit(ht, X, y)
            ht_xor_y_hat, ht_rxor_y_hat = ht_predict(ht, test_x_xor,
                                                     test_x_rxor)
            errors[0, i] = 1 - np.mean(ht_xor_y_hat == test_y_xor)
            errors[1, i] = 1 - np.mean(ht_rxor_y_hat == test_y_rxor)

        # Mondrian Forest Classifier
        if classifiers[1] == 1:
            mf.partial_fit(X, y)
            mf_xor_y_hat = mf.predict(test_x_xor)
            mf_rxor_y_hat = mf.predict(test_x_rxor)
            errors[2, i] = 1 - np.mean(mf_xor_y_hat == test_y_xor)
            errors[3, i] = 1 - np.mean(mf_rxor_y_hat == test_y_rxor)

        # Stream Decision Tree Classifier
        if classifiers[2] == 1:
            sdt.partial_fit(X, y, classes=[0, 1])
            sdt_xor_y_hat = sdt.predict(test_x_xor)
            sdt_rxor_y_hat = sdt.predict(test_x_rxor)
            errors[4, i] = 1 - np.mean(sdt_xor_y_hat == test_y_xor)
            errors[5, i] = 1 - np.mean(sdt_rxor_y_hat == test_y_rxor)

        # Stream Decision Forest Classifier
        if classifiers[3] == 1:
            sdf.partial_fit(X, y, classes=[0, 1])
            sdf_xor_y_hat = sdf.predict(test_x_xor)
            sdf_rxor_y_hat = sdf.predict(test_x_rxor)
            errors[6, i] = 1 - np.mean(sdf_xor_y_hat == test_y_xor)
            errors[7, i] = 1 - np.mean(sdf_rxor_y_hat == test_y_rxor)

        # Synergistic Forest Classifier
        if classifiers[4] == 1:
            if i == 0:
                synf.add_task(X, y, n_estimators=10, task_id=0)
                synf_xor_y_hat = synf.predict(test_x_xor, task_id=0)
            elif i < (n_xor / 25):
                synf.update_task(X, y, task_id=0)
                synf_xor_y_hat = synf.predict(test_x_xor, task_id=0)
            elif i == (n_xor / 25):
                synf.add_task(X, y, n_estimators=10, task_id=1)
                synf_xor_y_hat = synf.predict(test_x_xor, task_id=0)
                synf_rxor_y_hat = synf.predict(test_x_rxor, task_id=1)
            elif i < (n_xor + n_rxor) / 25:
                synf.update_task(X, y, task_id=1)
                synf_xor_y_hat = synf.predict(test_x_xor, task_id=0)
                synf_rxor_y_hat = synf.predict(test_x_rxor, task_id=1)
            elif i < (2 * n_xor + n_rxor) / 25:
                synf.update_task(X, y, task_id=0)
                synf_xor_y_hat = synf.predict(test_x_xor, task_id=0)
                synf_rxor_y_hat = synf.predict(test_x_rxor, task_id=1)

            if i < (n_xor / 25):
                errors[8, i] = 1 - np.mean(synf_xor_y_hat == test_y_xor)
            if i >= (n_xor / 25):
                errors[8, i] = 1 - np.mean(synf_xor_y_hat == test_y_xor)
                errors[9, i] = 1 - np.mean(synf_rxor_y_hat == test_y_rxor)

    return errors
示例#18
0
linearly_separable = (X, y)

datasets = [
    make_moons(n_samples=n_samples, noise=0.3, random_state=0),
    make_circles(n_samples=n_samples,
                 noise=0.2,
                 factor=0.5,
                 random_state=random_state), linearly_separable
]

n_trees = 10

classifiers = [('OMAF',
                OnlineForestClassifier(n_classes=n_classes,
                                       n_trees=n_trees,
                                       seed=123,
                                       use_aggregation=True,
                                       split_pure=True,
                                       memory=512)),
               ('MF', MondrianForestClassifier(n_estimators=n_trees)),
               ('RF', RandomForestClassifier(n_estimators=n_trees)),
               ('ET', ExtraTreesClassifier(n_estimators=n_trees))]

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=.4, random_state=42)

plot_decision_classification(classifiers, datasets)

logging.info("Saved the decision functions in 'decision.pdf")
plt.savefig('decisions.pdf')
示例#19
0
def run_method_on_dataset(method, dataset, n_iter, n_batches, n_estimators,
                          max_depth):
    mean_fit_time = []
    mean_train_acc = []
    mean_test_acc = []

    for i in range(n_iter):

        if method == 'classical_full_data':
            clf = RandomForestClassifier(n_estimators=n_estimators,
                                         max_depth=max_depth,
                                         n_jobs=-1)
            fit_time, train_acc, test_acc = classical_rf_refit(
                clf, dataset, n_batches)

        elif method == 'classical_window_1':
            clf = RandomForestClassifier(n_estimators=n_estimators,
                                         max_depth=max_depth,
                                         n_jobs=-1)
            fit_time, train_acc, test_acc = classical_rf_window(clf,
                                                                dataset,
                                                                n_batches,
                                                                h=1)

        elif method == 'classical_window_3':
            clf = RandomForestClassifier(n_estimators=n_estimators,
                                         max_depth=max_depth,
                                         n_jobs=-1)
            fit_time, train_acc, test_acc = classical_rf_window(clf,
                                                                dataset,
                                                                n_batches,
                                                                h=3)

        elif method == 'classical_window_5':
            clf = RandomForestClassifier(n_estimators=n_estimators,
                                         max_depth=max_depth,
                                         n_jobs=-1)
            fit_time, train_acc, test_acc = classical_rf_window(clf,
                                                                dataset,
                                                                n_batches,
                                                                h=5)

        elif method == 'classical_increment_frac_0.2':
            clf = RandomForestClassifier(n_estimators=n_estimators,
                                         max_depth=max_depth,
                                         warm_start=True)
            fit_time, train_acc, test_acc = classical_rf_incremental(
                clf, dataset, n_batches, new_frac=0.2)

        elif method == 'classical_increment_frac_0.5':
            clf = RandomForestClassifier(n_estimators=n_estimators,
                                         max_depth=max_depth,
                                         warm_start=True)
            fit_time, train_acc, test_acc = classical_rf_incremental(
                clf, dataset, n_batches, new_frac=0.5)

        elif method == 'extratrees_full_data':
            clf = ExtraTreesClassifier(n_estimators=n_estimators,
                                       max_depth=max_depth,
                                       n_jobs=-1)
            fit_time, train_acc, test_acc = classical_rf_refit(
                clf, dataset, n_batches)

        elif method == 'extratrees_window_1':
            clf = ExtraTreesClassifier(n_estimators=n_estimators,
                                       max_depth=max_depth,
                                       n_jobs=-1)
            fit_time, train_acc, test_acc = classical_rf_window(clf,
                                                                dataset,
                                                                n_batches,
                                                                h=1)

        elif method == 'extratrees_window_3':
            clf = ExtraTreesClassifier(n_estimators=n_estimators,
                                       max_depth=max_depth,
                                       n_jobs=-1)
            fit_time, train_acc, test_acc = classical_rf_window(clf,
                                                                dataset,
                                                                n_batches,
                                                                h=3)

        elif method == 'extratrees_window_5':
            clf = ExtraTreesClassifier(n_estimators=n_estimators,
                                       max_depth=max_depth,
                                       n_jobs=-1)
            fit_time, train_acc, test_acc = classical_rf_window(clf,
                                                                dataset,
                                                                n_batches,
                                                                h=5)

        elif method == 'extratrees_increment_frac_0.2':
            clf = ExtraTreesClassifier(n_estimators=n_estimators,
                                       max_depth=max_depth,
                                       warm_start=True)
            fit_time, train_acc, test_acc = classical_rf_incremental(
                clf, dataset, n_batches, new_frac=0.2)

        elif method == 'extratrees_increment_frac_0.5':
            clf = ExtraTreesClassifier(n_estimators=n_estimators,
                                       max_depth=max_depth,
                                       warm_start=True)
            fit_time, train_acc, test_acc = classical_rf_incremental(
                clf, dataset, n_batches, new_frac=0.5)

        elif method == 'mondrian_skgarden':
            clf = MondrianForestClassifier(n_estimators=n_estimators,
                                           max_depth=max_depth)
            fit_time, train_acc, test_acc = mondrian_rf_skgarden(
                clf, dataset, n_batches)

        elif method == 'mondrian':
            clf = OurMondrianForestClassifier(n_estimators=n_estimators,
                                              budget=max_depth)
            fit_time, train_acc, test_acc = mondrian_rf_our(
                clf, dataset, n_batches)

        mean_fit_time.append(fit_time)
        mean_train_acc.append(train_acc)
        mean_test_acc.append(test_acc)

    mean_fit_time = np.mean(mean_fit_time, axis=0)
    mean_train_acc = np.mean(mean_train_acc, axis=0)
    mean_test_acc = np.mean(mean_test_acc, axis=0)

    return mean_fit_time, mean_train_acc, mean_test_acc
示例#20
0
 def __init__(self, rf_estimators=15, rf_max_depth=2, rf_n_jobs=-1):
     super(MondorianForest, self).__init__(name="Mondorian Forest")
     self.model = MondrianForestClassifier()
     logging.basicConfig(filename=path + "reports\\" + "mf.log", level=logging.info)
     logging.info("Mondorian Forest Log created")
示例#21
0
def get_classifiers_batch(n_classes, random_state=42):
    use_aggregations = [True]
    n_estimatorss = [10]
    split_pures = [False]
    dirichlets = [None]
    learning_rates = [1e-1]

    for (n_estimators, use_aggregation, split_pure,
         dirichlet) in product(n_estimatorss, use_aggregations, split_pures,
                               dirichlets):
        yield (
            # "AMF(nt=%s, ag=%s, sp=%s, di=%s)"
            #           % (
            #           str(n_estimators),
            #       str(use_aggregation),
            #       str(split_pure),
            #       str(dirichlet),
            # ),
            "AMF",
            AMFClassifier(
                n_classes=n_classes,
                random_state=random_state,
                use_aggregation=use_aggregation,
                n_estimators=n_estimators,
                split_pure=split_pure,
                dirichlet=dirichlet,
                verbose=False,
            ),
        )

    for n_estimators in n_estimatorss:
        yield (
            # "MF(nt=%s)" % str(n_estimators),
            "MF",
            MondrianForestClassifier(n_estimators=n_estimators,
                                     random_state=random_state),
        )

    for n_estimators in n_estimatorss:
        yield (
            # "RF(nt=%s)" % str(n_estimators),
            "RF",
            RandomForestClassifier(
                n_estimators=n_estimators,
                class_weight=None,
                random_state=random_state,
                n_jobs=1,
            ),
        )

    for n_estimators in n_estimatorss:
        yield (
            # "ET(nt=%s)" % str(n_estimators),
            "ET",
            ExtraTreesClassifier(
                n_estimators=n_estimators,
                class_weight=None,
                random_state=random_state,
                n_jobs=1,
            ),
        )

    for learning_rate in learning_rates:
        yield (
            # "SGD(%s)" % str(learning_rate),
            "SGD",
            SGDClassifier(
                loss="log",
                learning_rate="constant",
                eta0=learning_rate,
                random_state=random_state,
            ),
        )
示例#22
0
        dirichlet=dirichlet,
        # n_samples_increment=,
        step=step,
        verbose=False,
    )
    ofc = OnlineForestClassifier(
        n_classes=n_classes,
        random_state=random_state,
        use_aggregation=use_aggregation,
        n_estimators=n_estimators,
        split_pure=split_pure,
        dirichlet=dirichlet,
        step=step,
        verbose=False,
    )
    mfc = MondrianForestClassifier(n_estimators=n_estimators,
                                   random_state=random_state)

    logging.info("Fitting AMF...")
    t1 = time()
    amf.partial_fit(X_train, y_train)
    t2 = time()
    logging.info("Done. time fit AMF: " + "%.2f" % (t2 - t1) + " seconds")

    logging.info("Fitting OFC...")
    t1 = time()
    ofc.partial_fit(X_train, y_train)
    t2 = time()
    logging.info("Done. time fit OFC:" + "%.2f" % (t2 - t1) + " seconds")

    logging.info("Fitting MFC...")
    t1 = time()
def get_mf_decision(n_estimators):
    clf = MondrianForestClassifier(n_estimators=n_estimators,
                                   random_state=random_state)
    clf.partial_fit(X, y)
    zz = clf.predict_proba(X_mesh)[:, 1].reshape(xx.shape)
    return zz