示例#1
0
def rotationforest(number):
    print("Rotation Forest estimators=%d" % (number))
    rotation = RotationForestClassifier(n_estimators=number)
    rotation.fit(X_train, y_train)
    y_pred = rotation.predict(X_test)

    print(confusion_matrix(y_test, y_pred))
    print(accuracy_score(y_test, y_pred))
示例#2
0
class RotationForest():
    def __init__(self):
        super(RotationForest, self).__init__()
        self.rf = RotationForestClassifier()

    def train_step(self, X, y_train, plot=False):
        y_train = y_train["label"].astype('category').cat.codes
        self.rf.fit(X, y_train)

    def test_forced(self, X, y_test):
        y_test = y_test["label"].astype('category').cat.codes
        probabilities = self.rf.predict_proba(X)
        return probabilities, y_test
示例#3
0
def svm(x_train, y_train, x_test, y_test):
    model = RotationForestClassifier(n_estimators=100,
                                     random_state=47,
                                     verbose=4)
    # model = SVC(C=1.0, kernel='poly', gamma=1.0, random_state=47, verbose=True)
    model.fit(x_train, y_train)

    with open('./data/svm-knn.pkl', 'wb') as f:
        pickle.dump(model, f)

    y_predict = model.predict(x_test)
    print("****************************************")
    print("Accuracy: ", accuracy_score(y_test, y_predict))
    print("MCC: ", matthews_corrcoef(y_test, y_predict))
    print("Precision: ", precision_score(y_test, y_predict))
    print("ROC auc score: ", roc_auc_score(y_test, y_predict))
    print("AUC def: ", auc(y_test, y_predict))
    print("F1 score: ", f1_score(y_test, y_predict))
    print("Sensitivity: ", sensitivity(y_test, y_predict))
    print("Specifity: ", specificity(y_test, y_predict))
    def test_rotation_forest(self):
        """ Smoke test for rotation forest """
        X, y = classification_data()
        xt, xv, yt, yv = train_test_split(X, y, test_size=0.3, random_state=77)
        clf = RotationForestClassifier(random_state=1234)
        clf.fit(xt, yt)

        proba = clf.predict_proba(xv)
        assert proba.shape[0] == xv.shape[0]
        assert np.all(proba <= 1)
        assert np.all(proba >= 0)

        yhat = clf.predict(xv)
        assert yhat.shape[0] == xv.shape[0]
        assert np.unique(yhat).tolist() == [0, 1]
示例#5
0
def cross_val(x_train, y_train):
    skf = StratifiedKFold(n_splits=10)

    model = RotationForestClassifier(n_estimators=100,
                                     random_state=47,
                                     verbose=4,
                                     n_jobs=-2)
    accuracy = []
    mcc = []
    precision = []
    roc_auc = []
    Sensitivity = []
    Specificity = []
    score = []
    f1 = []
    for x in range(10):
        for train_index, test_index in skf.split(x_train, y_train):
            X_train, X_test = x_train[train_index], x_train[test_index]
            Y_train, Y_test = y_train[train_index], y_train[test_index]

            model.fit(X_train, Y_train)
            y_predict = model.predict(X_test)
            score.append(model.score(X_test, Y_test))

            accuracy.append(accuracy_score(Y_test, y_predict))
            mcc.append(matthews_corrcoef(Y_test, y_predict))
            precision.append(precision_score(Y_test, y_predict))
            f1.append(f1_score(Y_test, y_predict))
            roc_auc.append(roc_auc_score(Y_test, y_predict))
            Sensitivity.append(sensitivity(Y_test, y_predict))
            Specificity.append(specificity(Y_test, y_predict))

    with open('./data/rotation_forest_knn_human100.pkl', 'wb') as f:
        pickle.dump(model, f)

    print("****************************************")
    print("Accuracy: ", np.mean(accuracy))
    print("MCC: ", np.mean(mcc))
    print("Precision: ", np.mean(precision))
    print("Roc auc score: ", np.mean(roc_auc))
    print("F1 score: {}\n".format(np.mean(f1)))
    print("Sensitivity: ", np.mean(Sensitivity))
    print("Specifity: ", np.mean(Specificity))
            'auc_mean': auc_mean,
            'auc_std': auc_std}

classifiers = [('Random Forest',
               RandomForestClassifier(random_state=12, n_estimators=25)),
              ('PCA + Random Forest',
               make_pipeline(PCA(), RandomForestClassifier(random_state=12,
                                                           n_estimators=25))),
              ('Rotation Tree',
               RotationTreeClassifier(random_state=12,
                                      n_features_per_subset=3)),
              ('Decision Tree',
               DecisionTreeClassifier(random_state=12)),
              ('Rotation Forest (PCA)',
               RotationForestClassifier(random_state=12,
                                        n_estimators=25,
                                        n_features_per_subset=3)),
              ('Rotation Forest (Randomized PCA)',
               RotationForestClassifier(random_state=12,
                                        n_estimators=25,
                                        n_features_per_subset=3,
                                        rotation_algo='randomized')),
              ('Adaboost (Rotation Tree)',
               AdaBoostClassifier(RotationTreeClassifier(n_features_per_subset=3,
                                                         random_state=12,
                                                         max_depth=3),
                                  n_estimators=25,
                                  random_state=12)
              ),
              ('Adaboost (Decision Tree)',
               AdaBoostClassifier(DecisionTreeClassifier(random_state=12,
示例#7
0
def robust_cross_val(x_train, y_train, x_test, y_test, folds):
    skf = StratifiedKFold(n_splits=folds, random_state=47)

    model = RotationForestClassifier(n_estimators=100,
                                     random_state=47,
                                     verbose=4,
                                     n_jobs=-2)
    accuracy = []
    mcc = []
    precision = []
    roc_auc = []
    Sensitivity = []
    Specificity = []
    auc_score = []
    f1 = []
    score = []

    for x in range(10):
        for train_index, test_index in skf.split(x_train, y_train):
            X_train, X_test = x_train[train_index], x_train[test_index]
            Y_train, Y_test = y_train[train_index], y_train[test_index]

            model.fit(X_train, Y_train)
            y_predict = model.predict(X_test)
            score.append(model.score(X_test, Y_test))

            accuracy.append(accuracy_score(Y_test, y_predict))
            mcc.append(matthews_corrcoef(Y_test, y_predict))
            precision.append(precision_score(Y_test, y_predict))
            roc_auc.append(roc_auc_score(Y_test, y_predict))
            auc_score.append(auc(Y_test, y_predict))
            f1.append(f1_score(Y_test, y_predict))
            Sensitivity.append(sensitivity(Y_test, y_predict))
            Specificity.append(specificity(Y_test, y_predict))

    with open('../data/rotation_forest_human.pkl', 'wb') as f:
        pickle.dump(model, f)

    res = "{} folds\n".format(folds)
    res += "******************** Cross Validation Score ********************\n"
    res += "Accuracy: {}\n".format(np.mean(accuracy))
    res += "MCC: {}\n".format(np.mean(mcc))
    res += "Precision: {}\n".format(np.mean(precision))
    res += "Roc AUC score: {}\n".format(np.mean(roc_auc))
    res += "AUC score: {}\n".format(np.mean(auc_score))
    res += "F1 score: {}\n".format(np.mean(f1))
    res += "Sensitivity: {}\n".format(np.mean(Sensitivity))
    res += "Specifity: {}\n".format(np.mean(Specificity))

    y_test_predict = model.predict(x_test)
    res += "\n******************** Independent Test Score ********************\n"
    res += "Accuracy: {}\n".format(accuracy_score(y_test, y_test_predict))
    res += "MCC: {}\n".format(matthews_corrcoef(y_test, y_test_predict))
    res += "Precision: {}\n".format(precision_score(y_test, y_test_predict))
    res += "Roc AUC score: {}\n".format(roc_auc_score(y_test, y_test_predict))
    res += "AUC score: {}\n".format(auc(y_test, y_test_predict))
    res += "F1 score: {}\n".format(f1_score(y_test, y_test_predict))
    res += "Sensitivity: {}\n".format(sensitivity(y_test, y_test_predict))
    res += "Specifity: {}\n\n\n".format(specificity(y_test, y_test_predict))

    with open('../data/rotation_forest_human_result.txt', 'a') as f:
        f.write(res)
示例#8
0
if __name__ == '__main__':

    dataset_dir = './datasets/'
    for dataset in os.listdir(dataset_dir):
        name = dataset.split('.')[0]

        X, Y = read_uci_dataset(dataset_dir + dataset)

        K = 10
        accuracy = []
        cv = StratifiedKFold(Y, K)

        print name
        for clf in [
                RotationForestClassifier(),
                RandomForestClassifier(n_estimators=10)
        ]:
            for train, test in cv:

                x_train, x_test, y_train, y_test = X[train, :], X[
                    test, :], Y[train], Y[test]

                clf = clf.fit(x_train, y_train)
                y_pred = clf.predict(x_test)
                accuracy.append(accuracy_score(y_test, y_pred))

            bd_std = np.std(accuracy)
            bd_acc = np.mean(accuracy)
            print '{0}: {1:2f} +/- {2:2f}'.format(clf.__class__.__name__,
                                                  bd_acc * 100, bd_std * 100)
    def test_warm_start(self):
        """ Test if fitting incrementally with warm start gives a forest of the right
            size and the same results as a normal fit.
        """
        X, y = classification_data()
        clf_ws = None
        for n_estimators in [5, 10]:
            if clf_ws is None:
                clf_ws = RotationForestClassifier(n_estimators=n_estimators,
                                                  random_state=1234,
                                                  warm_start=True)
            else:
                clf_ws.set_params(n_estimators=n_estimators)
            clf_ws.fit(X, y)
            assert len(clf_ws) == n_estimators

        clf_no_ws = RotationForestClassifier(n_estimators=10,
                                             random_state=1234,
                                             warm_start=False)
        clf_no_ws.fit(X, y)
        assert set([tree.random_state for tree in clf_ws
                    ]) == set([tree.random_state for tree in clf_no_ws])

        npt.assert_array_equal(clf_ws.apply(X), clf_no_ws.apply(X))
 def test_error_unkown_algo(self):
     """ Make sure we throw an error when selecting an unknown algorithm """
     X, y = classification_data()
     clf = RotationForestClassifier(random_state=1234, rotation_algo='cat')
     with pytest.raises(ValueError):
         clf.fit(X, y)
示例#11
0
    c_dir = Path('../')
    dataset_dir = (c_dir / 'datasets') / 'classification_data'
    print(c_dir.absolute())
    for dataset in dataset_dir.glob('*.data'):
        name = dataset.name.split('.')[0]

        X, Y = read_uci_dataset(dataset)

        k = 10
        accuracy = []
        skf = StratifiedKFold(
            n_splits=k, shuffle=True, random_state=1234
        )

        print(name)
        for clf in [RotationForestClassifier(), RandomForestClassifier(n_estimators=10)]:
            #for train, test in cv:
            for train_index, test_index in skf.split(X, Y):

                #x_train, x_test, y_train, y_test = X[train,:], X[test,:], Y[train], Y[test]
                x_train, x_test = X[train_index], X[test_index]
                y_train, y_test = Y[train_index], Y[test_index]

                clf = clf.fit(x_train, y_train)
                y_pred = clf.predict(x_test)
                accuracy.append(accuracy_score(y_test, y_pred))

            bd_std = np.std(accuracy)
            bd_acc = np.mean(accuracy)
            print(
                '{0}: {1:2f} +/- {2:2f}'.format(
示例#12
0
 def __init__(self):
     super(RotationForest, self).__init__()
     self.rf = RotationForestClassifier()
示例#13
0
                                      subsample=class_sample_rate,
                                      colsample_bytree=max_features /
                                      foldData.shape[1],
                                      random_state=0)
                lgbm.fit(trainingData, trainingLabel)
                lgbm_result = lgbm.predict(testingData)
                f1_weighted_lightgbm_save.append(
                    f1_score(testingLabel, lgbm_result, average='weighted'))
                f1_macro_lightgbm_save.append(
                    f1_score(testingLabel, lgbm_result, average='macro'))
                f1_micro_lightgbm_save.append(
                    f1_score(testingLabel, lgbm_result, average='micro'))

                rot_forest = RotationForestClassifier(
                    n_estimators=n_estimators,
                    max_depth=max_depth,
                    max_features=max_features,
                    random_state=0)
                rot_forest.fit(trainingData, trainingLabel)
                rot_forest_result = rot_forest.predict(testingData)
                f1_weighted_rotation_forest_save.append(
                    f1_score(testingLabel,
                             rot_forest_result,
                             average='weighted'))
                f1_macro_rotation_forest_save.append(
                    f1_score(testingLabel, rot_forest_result, average='macro'))
                f1_micro_rotation_forest_save.append(
                    f1_score(testingLabel, rot_forest_result, average='micro'))

                svm_classifier = SVC(kernel='rbf')
                svm_classifier.fit(trainingData, trainingLabel)