Пример #1
0
def test_model_sample_weight():
    """Run the workflow of deep forest with a local buffer."""

    case_kwargs = copy.deepcopy(kwargs)

    # Training without sample_weight
    model = CascadeForestClassifier(**case_kwargs)
    model.fit(X_train, y_train)
    y_pred_no_sample_weight = model.predict(X_test)

    # Training with equal sample_weight
    model = CascadeForestClassifier(**case_kwargs)
    sample_weight = np.ones(y_train.size)
    model.fit(X_train, y_train, sample_weight=sample_weight)
    y_pred_equal_sample_weight = model.predict(X_test)

    # Make sure the same predictions with None and equal sample_weight
    assert_array_equal(y_pred_no_sample_weight, y_pred_equal_sample_weight)

    model = CascadeForestClassifier(**case_kwargs)
    sample_weight = np.where(y_train == 0, 0.1, y_train)
    model.fit(X_train, y_train, sample_weight=y_train)
    y_pred_skewed_sample_weight = model.predict(X_test)

    # Make sure the different predictions with None and equal sample_weight
    assert_raises(AssertionError, assert_array_equal,
                  y_pred_skewed_sample_weight, y_pred_equal_sample_weight)

    model.clean()  # clear the buffer
Пример #2
0
def test_model_workflow_in_memory(backend):
    """Run the workflow of deep forest with in-memory mode."""

    case_kwargs = copy.deepcopy(kwargs)
    case_kwargs.update({"partial_mode": False})
    case_kwargs.update({"backend": backend})

    model = CascadeForestClassifier(**case_kwargs)
    model.fit(X_train, y_train)

    # Test feature_importances_
    if backend == "sklearn":
        model.get_layer_feature_importances(0)
    else:
        with pytest.raises(RuntimeError) as excinfo:
            model.get_layer_feature_importances(0)
        assert "Please use the sklearn backend" in str(excinfo.value)

    # Predictions before saving
    y_pred_before = model.predict(X_test)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier(**case_kwargs)
    model.load(save_dir)

    # Make sure the same predictions before and after model serialization
    y_pred_after = model.predict(X_test)

    assert_array_equal(y_pred_before, y_pred_after)

    shutil.rmtree(save_dir)
Пример #3
0
def test_model_workflow_partial_mode():
    """Run the workflow of deep forest with a local buffer."""

    case_kwargs = copy.deepcopy(kwargs)
    case_kwargs.update({"partial_mode": True})

    model = CascadeForestClassifier(**case_kwargs)
    model.fit(X_train, y_train)

    # Predictions before saving
    y_pred_before = model.predict(X_test)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier(**case_kwargs)
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
Пример #4
0
def test_model_workflow_in_memory(backend):
    """Run the workflow of deep forest with in-memory mode."""

    case_kwargs = copy.deepcopy(kwargs)
    case_kwargs.update({"partial_mode": False})
    case_kwargs.update({"backend": backend})

    model = CascadeForestClassifier(**case_kwargs)
    model.fit(X_train, y_train)

    # Predictions before saving
    y_pred_before = model.predict(X_test)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier(**case_kwargs)
    model.load(save_dir)

    # Make sure the same predictions before and after model serialization
    y_pred_after = model.predict(X_test)

    assert_array_equal(y_pred_before, y_pred_after)

    shutil.rmtree(save_dir)
Пример #5
0
def test_model_input_label_encoder():
    """Test if the model behaves the same with and without label encoding."""

    # Load data
    X, y = load_digits(return_X_y=True)
    y_as_str = np.char.add("label_", y.astype(str))

    # Train model on integer labels. Labels should look like: 1, 2, 3, ...
    model = CascadeForestClassifier(random_state=1)
    model.fit(X, y)
    y_pred_int_labels = model.predict(X)

    # Train model on string labels. Labels should look like: "label_1", "label_2", "label_3", ...
    model = CascadeForestClassifier(random_state=1)
    model.fit(X, y_as_str)
    y_pred_str_labels = model.predict(X)

    # Check if the underlying data are the same
    y_pred_int_labels_as_str = np.char.add(
        "label_", y_pred_int_labels.astype(str)
    )
    assert_array_equal(y_pred_str_labels, y_pred_int_labels_as_str)

    # Clean up buffer
    model.clean()
Пример #6
0
def test_custom_cascade_layer_workflow_partial_mode():

    model = CascadeForestClassifier(partial_mode=True)

    n_estimators = 4
    estimators = [DecisionTreeClassifier() for _ in range(n_estimators)]
    model.set_estimator(estimators)  # set custom base estimators

    predictor = DecisionTreeClassifier()
    model.set_predictor(predictor)

    model.fit(X_train, y_train)
    y_pred_before = model.predict(X_test)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier()
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
def test_classifier_custom_cascade_layer_workflow_in_memory():

    model = CascadeForestClassifier()

    n_estimators = 4
    estimators = [DecisionTreeClassifier() for _ in range(n_estimators)]
    model.set_estimator(estimators)  # set custom base estimators

    predictor = DecisionTreeClassifier()
    model.set_predictor(predictor)

    model.fit(X_train_clf, y_train_clf)
    y_pred_before = model.predict(X_test_clf)

    # Save and Reload
    model.save(save_dir)

    model = CascadeForestClassifier()
    model.load(save_dir)

    # Predictions after loading
    y_pred_after = model.predict(X_test_clf)

    # Make sure the same predictions before and after model serialization
    assert_array_equal(y_pred_before, y_pred_after)

    assert (model.get_estimator(0, 0, "custom") is
            model._get_layer(0).estimators_["0-0-custom"].estimator_)

    model.clean()  # clear the buffer
    shutil.rmtree(save_dir)
               random_state=args.randomseed,
               n_jobs=-1)
    x_resampled, y_resampled = sm.fit_sample(X, y)
    # after over sampleing 读取分类信息并返回数量
    np_resampled_y = np.asarray(np.unique(y_resampled, return_counts=True))
    df_resampled_y = pd.DataFrame(np_resampled_y.T, columns=['Class', 'Sum'])
    print("\nNumber of samples after over sampleing:\n{0}\n".format(
        df_resampled_y))

    # 初始化 classifier
    clf = CascadeForestClassifier(random_state=args.randomseed)
    print("\nClassifier parameters:")
    print(clf.get_params())
    print("\nSMOTE parameters:")
    print(sm.get_params())
    print("\n")

    # 使用SMOTE后数据进行训练
    clf.fit(x_resampled, y_resampled)
    # 预测测试集
    y_pred = clf.predict(X_test)

    # 输出测试集统计结果
    if (num_categories > 2):
        model_evaluation(num_categories, y_test, y_pred)
    else:
        bi_model_evaluation(y_test, y_pred)
    end_time = time.time()  # 程序结束时间
    print("\n[Finished in: {0:.6f} mins = {1:.6f} seconds]".format(
        ((end_time - start_time) / 60), (end_time - start_time)))
Пример #9
0
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score
from deepforest import CascadeForestClassifier
from sklearn.model_selection import train_test_split

X, y = load_iris(return_X_y=True)

X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = CascadeForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred) * 100
print("Accuracy: {:.3f} %".format(acc))
Пример #10
0
                                         n_trees=100,
                                         n_jobs=6,
                                         use_predictor="forest")

classification.fit(train_data, train_label.ravel())
prediction = classification.predict_proba(valx)
probability = [prob[1] for prob in prediction]
pre_class = []
for i in probability:
    if i > 0.5:
        pre_class.append(1)
    else:
        pre_class.append(0)
prediction = prediction[:, 1]

pred = classification.predict(valx)

print(pred.shape)

print("训练集:", classification.score(train_data, train_label))
print("测试集:", classification.score(test_data, test_label))

a = classification.score(valx, valy)
p = precision_score(valy, pred)
acc = roc_auc_score(valy, pre_class)
roc = roc_auc_score(valy, prediction)
f1_score = f1_score(valy, pred)
cm = confusion_matrix(valy, pred)
recall_score = recall_score(valy, pred)