def test_classifier(dataset):
    return
    X_train, X_test, y_train, y_test = dataset
    my_classifier = GradientBoostingClassifier(max_depth=10, n_estimators=55)
    my_classifier.fit(X_train, y_train, X_test, y_test)
    print(my_classifier.score(X_test, y_test))

    classifier = SklearnGradientBoostingClassifier(max_depth=10,
                                                   n_estimators=55)
    classifier.fit(X_train, y_train)
    print(classifier.score(X_test, y_test))
示例#2
0
def test_gradient_boosting_classification():
    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    print (X.shape, y.shape)
    train_X, train_y, test_X, test_y = split_train_test(X, y)
    print (train_X.shape, train_y.shape, test_X.shape, test_y.shape)

    clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1)
    clf.fit(train_X, train_y)
    preds = clf.predict(test_X)
    accuracy = cal_accuracy(test_y, preds)
    print ('accuracy: ', accuracy)
def test_real_data(dataset):
    # X_train, X_valid, X_test, y_train, y_valid, y_test = DataLoader().load()
    X_train, X_test, y_train, y_test = DataLoader().load()
    # X_train, X_test, y_train, y_test = dataset
    #X_train_ = np.copy(X_train)
    #y_train_ = np.copy(y_train)
    #X_test_ = np.copy(X_test)
    #y_test_ = np.copy(y_test)

    n_estimators = 300
    model = GradientBoostingClassifier(
        n_estimators=n_estimators,
        max_depth=1,
        min_samples_split=20,
        subsample=0.5,
        learning_rate=0.2,
        #max_features=0.8,
        min_samples_leaf=10)
    model.fit(X_train, y_train.reshape(-1))
    prediction = model.staged_predict_proba(X_test)
    y = loss(y_test.reshape(-1), prediction)
    board = DashBoard()
    board.init_graph(name="my_one",
                     title="my_one",
                     line_type="default",
                     c="r",
                     y=y.reshape(-1))

    model = SklearnGradientBoostingClassifier(
        n_estimators=n_estimators,
        max_depth=1,
        criterion="mse",
        min_samples_split=20,
        min_samples_leaf=10,
        subsample=0.5,
        learning_rate=0.1,
        #max_features=0.1,
        init="zero")
    model.fit(X_train, y_train)
    prediction = np.empty([n_estimators, X_test.shape[0]])
    for i, proba in enumerate(model.staged_predict_proba(X_test)):
        prediction[i] = proba[:, 1]
    print(y_test.shape, prediction.shape)
    # assert 0
    y = loss(y_test.reshape(-1), prediction)

    board.init_graph(name="baseline",
                     title="sklearn",
                     line_type="baseline",
                     c="g",
                     y=y.reshape(-1))
    board.make_plot()
示例#4
0
文件: main.py 项目: Maen1/iium
def main():

    print("-- Gradient Boosting Classification --")

    df = pd.read_csv("cancer_o.csv")
    y = df.level
    X = df.drop(['level', 'patient_id'], axis=1)
    data_classes = ["Low", "Medium", "High"]

    y = df['level'].apply(data_classes.index)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42)

    clf = GradientBoostingClassifier()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)
示例#5
0
rescaled_y_test = 2*y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf = 8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = MultilayerPerceptron(n_hidden=20)
perceptron = Perceptron()
decision_tree = ClassificationTree()
random_forest = RandomForest(n_estimators=50)
support_vector_machine = SupportVectorMachine()
lda = LDA()
gbc = GradientBoostingClassifier(n_estimators=20, learning_rate=1)
xgboost = XGBoost()

# ........
#  TRAIN
# ........
print ("Training:")
print ("\tAdaboost")
adaboost.fit(X_train, rescaled_y_train)
print ("\tDecision Tree")
decision_tree.fit(X_train, y_train)
print ("\tGradient Boosting")
gbc.fit(X_train, y_train)
print ("\tLDA")
lda.fit(X_train, y_train)
print ("\tLogistic Regression")
示例#6
0
# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf=8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = MultilayerPerceptron(n_hidden=20, n_iterations=20000, learning_rate=0.1)
perceptron = Perceptron()
decision_tree = ClassificationTree()
random_forest = RandomForest(n_estimators=50)
support_vector_machine = SupportVectorMachine()
lda = LDA()
gbc = GradientBoostingClassifier(n_estimators=50,
                                 learning_rate=.9,
                                 max_depth=2)
xgboost = XGBoost(n_estimators=50, learning_rate=0.5, max_depth=2)

# ........
#  TRAIN
# ........
print("Training:")
print("\tAdaboost")
adaboost.fit(X_train, rescaled_y_train)
print("\tDecision Tree")
decision_tree.fit(X_train, y_train)
print("\tGradient Boosting")
gbc.fit(X_train, y_train)
print("\tLDA")
lda.fit(X_train, y_train)
示例#7
0
rescaled_y_test = 2*y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf = 8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = MultilayerPerceptron(n_hidden=20, n_iterations=20000, learning_rate=0.1)
perceptron = Perceptron()
decision_tree = ClassificationTree()
random_forest = RandomForest(n_estimators=50)
support_vector_machine = SupportVectorMachine()
lda = LDA()
gbc = GradientBoostingClassifier(n_estimators=50, learning_rate=.9, max_depth=2)
xgboost = XGBoost(n_estimators=50, learning_rate=0.5)

# ........
#  TRAIN
# ........
print ("Training:")
print ("\tAdaboost")
adaboost.fit(X_train, rescaled_y_train)
print ("\tDecision Tree")
decision_tree.fit(X_train, y_train)
print ("\tGradient Boosting")
gbc.fit(X_train, y_train)
print ("\tLDA")
lda.fit(X_train, y_train)
print ("\tLogistic Regression")
示例#8
0
rescaled_y_test = 2 * y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf=8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = MultilayerPerceptron(n_hidden=20)
perceptron = Perceptron()
decision_tree = ClassificationTree()
random_forest = RandomForest(n_estimators=150)
support_vector_machine = SupportVectorMachine()
lda = LDA()
gbc = GradientBoostingClassifier()

# ........
#  TRAIN
# ........
print("Training:")
print("\tAdaboost")
adaboost.fit(X_train, rescaled_y_train)
print("\tDecision Tree")
decision_tree.fit(X_train, y_train)
print("\tGradient Boosting")
gbc.fit(X_train, y_train)
print("\tLDA")
lda.fit(X_train, y_train)
print("\tLogistic Regression")
logistic_regression.fit(X_train, y_train)