Python ForestClassifier.apply примеры использования

Язык программирования: Python

Пространство имен/Пакет: wildwood

Класс/Тип: ForestClassifier

Метод/Функция: apply

Примеров на hotexamples.com: 4

Python ForestClassifier.apply - 4 примера найдено. Это лучшие примеры Python кода для wildwood.ForestClassifier.apply, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ForestClassifier(30)

fit(22)

predict_proba(11)

apply(4)

predict(2)

_fitted(2)

min_samples_split(1)

step(1)

n_samples_in_(1)

n_jobs(1)

n_features_in_(1)

n_estimators(1)

loss(1)

min_samples_leaf(1)

get_nodes(1)

dirichlet(1)

criterion(1)

class_weight(1)

categorical_features(1)

aggregation(1)

_generate_random_states(1)

verbose(1)

Пример #1

Показать файл

Файл: test_forest.py Проект: pyensemble/wildwood

        def do_test_bootstrap(n_estimators, n_jobs, random_state):
            # 1. Test that all bootstrap samples are different
            clf = ForestClassifier(n_estimators=n_estimators,
                                   n_jobs=n_jobs,
                                   random_state=random_state)
            clf.fit(X, y)

            for n_estimator1, n_estimator2 in product(range(n_estimators),
                                                      range(n_estimators)):
                if n_estimator1 < n_estimator2:
                    assert clf.trees[n_estimator1]._train_indices != approx(
                        clf.trees[n_estimator2]._train_indices)
                    assert clf.trees[n_estimator1]._valid_indices != approx(
                        clf.trees[n_estimator2]._valid_indices)

            # 2. Test that random_seed makes bootstrap samples identical and that
            #    when no random_seed is used bootstrap samples are different
            clf1 = ForestClassifier(n_estimators=n_estimators,
                                    n_jobs=n_jobs,
                                    random_state=random_state)
            clf1.fit(X, y)
            clf2 = ForestClassifier(n_estimators=n_estimators,
                                    n_jobs=n_jobs,
                                    random_state=random_state)
            clf2.fit(X, y)
            for n_estimator in range(n_estimators):
                if random_state is None:
                    assert clf1.trees[n_estimator]._train_indices != approx(
                        clf2.trees[n_estimator]._train_indices)
                    assert clf1.trees[n_estimator]._valid_indices != approx(
                        clf2.trees[n_estimator]._valid_indices)
                else:
                    assert clf1.trees[n_estimator]._train_indices == approx(
                        clf2.trees[n_estimator]._train_indices)
                    assert clf1.trees[n_estimator]._valid_indices == approx(
                        clf2.trees[n_estimator]._valid_indices)

            # 3. Test that the apply() method gives the exact same leaves (this allows
            #    to check that the trees are the same, namely that random columns
            #    subsampling is indeed correctly seeded) and that predictions are the
            #    same (or not)
            clf1 = ForestClassifier(n_estimators=n_estimators,
                                    n_jobs=n_jobs,
                                    random_state=random_state)
            clf1.fit(X, y)
            clf2 = ForestClassifier(n_estimators=n_estimators,
                                    n_jobs=n_jobs,
                                    random_state=random_state)
            clf2.fit(X, y)
            if random_state is None:
                assert clf1.apply(X) != approx(clf2.apply(X))
                assert clf1.predict_proba(X) != approx(clf2.predict_proba(X))
            else:
                assert clf1.apply(X) == approx(clf2.apply(X))
                assert clf1.predict_proba(X) == approx(clf2.predict_proba(X))

Пример #2

Показать файл

Файл: test_serialization.py Проект: pyensemble/wildwood

def test_forest_classifier_serialization(
    dataset_name,
    n_estimators,
    aggregation,
    class_weight,
    dirichlet,
    n_jobs,
    max_features,
    random_state,
    step,
    multiclass,
    cat_split_strategy,
):
    if dataset_name == "adult":
        X, y = load_adult(raw=True)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, random_state=random_state)
    elif dataset_name == "iris":
        iris = datasets.load_iris()
        X = iris.data
        y = iris.target
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=1 / 5, random_state=random_state)

    clf1 = ForestClassifier(
        n_estimators=n_estimators,
        n_jobs=n_jobs,
        multiclass=multiclass,
        max_bins=37,
        cat_split_strategy=cat_split_strategy,
        aggregation=aggregation,
        max_features=max_features,
        class_weight=class_weight,
        random_state=random_state,
        dirichlet=dirichlet,
        step=step,
    )
    clf1.fit(X_train, y_train)

    filename = "forest_classifier_on_iris.pkl"
    with open(filename, "wb") as f:
        pkl.dump(clf1, f)

    with open(filename, "rb") as f:
        clf2 = pkl.load(f)

    os.remove(filename)

    assert_forests_equal(clf1, clf2, is_classifier=True)

    y_pred1 = clf1.predict_proba(X_test)
    y_pred2 = clf2.predict_proba(X_test)
    np.testing.assert_equal(y_pred1, y_pred2)

    y_pred1 = clf1.predict(X_test)
    y_pred2 = clf2.predict(X_test)
    np.testing.assert_equal(y_pred1, y_pred2)

    apply1 = clf1.apply(X_test)
    apply2 = clf2.apply(X_test)
    np.testing.assert_equal(apply1, apply2)

Пример #3

Показать файл

Файл: test_forest.py Проект: pyensemble/wildwood

    def test_class_weight_sample_weights(self):
        iris = self.iris
        X, y = iris["data"], iris["target"]
        # Check that no sample_weight and all sample weights equal to 1. is the same
        clf1 = ForestClassifier(class_weight=None, random_state=42)
        clf1.fit(X, y)
        clf2 = ForestClassifier(class_weight=None, random_state=42)
        clf2.fit(X, y, sample_weight=np.ones(y.shape[0]))
        assert clf1.apply(X) == approx(clf2.apply(X))
        assert clf1.predict_proba(X) == approx(clf2.predict_proba(X))

        clf1 = ForestClassifier(class_weight="balanced", random_state=42)
        clf1.fit(X, y)
        clf2 = ForestClassifier(class_weight=None, random_state=42)
        sample_weight = compute_sample_weight("balanced", y)
        clf2.fit(X, y, sample_weight=sample_weight)
        assert clf1.apply(X) == approx(clf2.apply(X))
        assert clf1.predict_proba(X) == approx(clf2.predict_proba(X))

        # Simulate unbalanced data from the iris datasets
        X_unb = np.concatenate((X[0:50], X[50:56], X[100:106]), axis=0)
        y_unb = np.concatenate((y[0:50], y[50:56], y[100:106]), axis=0)

        X_train, X_test, y_train, y_test = train_test_split(X_unb,
                                                            y_unb,
                                                            shuffle=True,
                                                            stratify=y_unb,
                                                            random_state=42,
                                                            test_size=0.5)

        clf = ForestClassifier(class_weight=None,
                               random_state=42,
                               aggregation=True)
        clf.fit(X_train, y_train)
        y_scores = clf.predict(X_test)
        report1 = classification_report(y_test, y_scores, output_dict=True)

        clf = ForestClassifier(class_weight="balanced",
                               random_state=42,
                               aggregation=True)
        clf.fit(X_train, y_train)
        y_scores = clf.predict(X_test)
        report2 = classification_report(y_test, y_scores, output_dict=True)

        # In the considered case, class_weight should improve all metrics
        for label in ["0", "1", "2"]:
            label_report1 = report1[label]
            label_report2 = report2[label]
            assert label_report2["precision"] >= label_report1["precision"]
            assert label_report2["recall"] >= label_report1["recall"]
            assert label_report2["f1-score"] >= label_report1["f1-score"]

        breast_cancer = self.breast_cancer
        X, y = breast_cancer["data"], breast_cancer["target"]
        idx_0 = y == 0
        idx_1 = y == 1

        X_unb = np.concatenate((X[idx_0], X[idx_1][:10]), axis=0)
        y_unb = np.concatenate((y[idx_0], y[idx_1][:10]), axis=0)

        X_train, X_test, y_train, y_test = train_test_split(X_unb,
                                                            y_unb,
                                                            shuffle=True,
                                                            stratify=y_unb,
                                                            random_state=42,
                                                            test_size=0.5)

        clf = ForestClassifier(class_weight=None,
                               random_state=42,
                               aggregation=True)
        clf.fit(X_train, y_train)
        y_scores = clf.predict(X_test)

        y_test_binary = LabelBinarizer().fit_transform(y_test)

        avg_prec1 = average_precision_score(y_test_binary,
                                            y_scores,
                                            average="weighted")

        clf = ForestClassifier(class_weight="balanced",
                               random_state=42,
                               aggregation=True)
        clf.fit(X_train, y_train)
        y_scores = clf.predict(X_test)
        avg_prec2 = average_precision_score(y_test_binary,
                                            y_scores,
                                            average="weighted")

        assert avg_prec2 > avg_prec1

Пример #4

Показать файл

Файл: test_forest.py Проект: pyensemble/wildwood

        def do_test_bootstrap_again(n_estimators, n_jobs):
            # 4. When bootstrap seeds and column subsampling seeds are the same,
            #    the trees are all the same
            clf = ForestClassifier(n_estimators=n_estimators, n_jobs=n_jobs)

            def _my_generate_random_states(self, n_states=None):
                self._random_states_bootstrap = np.ones(
                    (n_states or clf.n_estimators), dtype=np.int32)
                self._random_states_trees = np.ones(
                    (n_states or clf.n_estimators), dtype=np.int32)

            # Monkey patch the classifier
            clf._generate_random_states = types.MethodType(
                _my_generate_random_states, clf)
            clf.fit(X, y)
            leaves = clf.apply(X)
            for n_estimator1, n_estimator2 in product(range(n_estimators),
                                                      range(n_estimators)):
                if n_estimator1 < n_estimator2:
                    assert clf.trees[n_estimator1]._train_indices == approx(
                        clf.trees[n_estimator2]._train_indices)
                    assert clf.trees[n_estimator1]._valid_indices == approx(
                        clf.trees[n_estimator2]._valid_indices)
                    assert leaves[n_estimator1] == approx(leaves[n_estimator2])

            # 5. When bootstrap seeds are the same but column subsampling seeds are
            #    different, all the trees are different
            clf = ForestClassifier(n_estimators=n_estimators, n_jobs=n_jobs)

            def _my_generate_random_states(self, n_states=None):
                # All bootstrap seeds are the same
                self._random_states_bootstrap = np.ones(
                    (n_states or clf.n_estimators), dtype=np.int32)
                # But column subsampling seeds are different
                self._random_states_trees = np.arange(n_states
                                                      or clf.n_estimators,
                                                      dtype=np.int32)

            # Monkey patch the classifier
            clf._generate_random_states = types.MethodType(
                _my_generate_random_states, clf)
            clf.fit(X, y)
            leaves = clf.apply(X)
            for n_estimator1, n_estimator2 in product(range(n_estimators),
                                                      range(n_estimators)):
                if n_estimator1 < n_estimator2:
                    assert clf.trees[n_estimator1]._train_indices == approx(
                        clf.trees[n_estimator2]._train_indices)
                    assert clf.trees[n_estimator1]._valid_indices == approx(
                        clf.trees[n_estimator2]._valid_indices)
                    assert leaves[n_estimator1] != approx(leaves[n_estimator2])

            # 6. When bootstrap seeds are different but column subsampling seeds are
            #    identical, all the trees are different
            clf = ForestClassifier(n_estimators=n_estimators, n_jobs=n_jobs)

            def _my_generate_random_states(self, n_states=None):
                # All bootstrap seeds are the same
                self._random_states_bootstrap = np.arange(n_states
                                                          or clf.n_estimators,
                                                          dtype=np.int32)
                # But column subsampling seeds are different
                self._random_states_trees = np.ones(
                    (n_states or clf.n_estimators, ), dtype=np.int32)

            # Monkey patch the classifier
            clf._generate_random_states = types.MethodType(
                _my_generate_random_states, clf)
            clf.fit(X, y)
            leaves = clf.apply(X)
            for n_estimator1, n_estimator2 in product(range(n_estimators),
                                                      range(n_estimators)):
                if n_estimator1 < n_estimator2:
                    assert clf.trees[n_estimator1]._train_indices != approx(
                        clf.trees[n_estimator2]._train_indices)
                    assert clf.trees[n_estimator1]._valid_indices != approx(
                        clf.trees[n_estimator2]._valid_indices)
                    assert leaves[n_estimator1] != approx(leaves[n_estimator2])