Пример #1
0
def xgboost(data, target, test_data, test_target=None, features=None):
    n = 406
    print ('n = ' + str(n))
    model = XGBClassifier()
    model.fit(data, target)
    prediction = model.predict(test_data)
    cv_score = cross_val_score(model, data, target, cv=10, scoring=scorer)
    # print(prediction)
    # print(test_target)
    print ('F1 score = ' + str(cv_score.mean()))
    print ('F1 score std = ' + str(cv_score.std()))
    # Feature importances into a dataframe
    if features is not None:
        feature_importances = pd.DataFrame({'feature': features, 'importance': model.feature_importances_})
        # print(feature_importances.head())
    validation.cal_rmse(prediction, test_target)
Пример #2
0
def gradient_boosting(data, target, test_data, test_target=None):
    model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=24, random_state=0)
    model.fit(data, target)
    prediction = model.predict(test_data)
    cv_score = cross_val_score(model, data, target, cv=10, scoring=scorer)
    # print(prediction)
    # print(test_target)
    print ('F1 score = ' + str(cv_score.mean()))
    print ('F1 score std = ' + str(cv_score.std()))
    return validation.cal_rmse(prediction, test_target)
Пример #3
0
def random_forest(data, target, test_data, test_target=None, features=None):
    # for n in range(407, 410):
        n = 408  # 408 seems best with max_features 59 in these values
        print ('n = ' + str(n))
        model = RandomForestClassifier(n_estimators=n, random_state=10, n_jobs=-1, max_features=59)
        model.fit(data, target)
        prediction_fit = model.predict(data)
        prediction = model.predict(test_data)
        cv_score = cross_val_score(model, data, target, cv=10, scoring=scorer)
        # print(prediction)
        # print(test_target)
        print ('F1 score = ' + str(cv_score.mean()))
        print ('F1 score std = ' + str(cv_score.std()))
        # Feature importances into a dataframe
        # if features is not None:
        #     feature_importances = pd.DataFrame({'feature': features, 'importance': model.feature_importances_})
        #     # print(feature_importances.head())
        validation.cal_rmse(prediction, test_target)
        return prediction_fit, prediction
Пример #4
0
def decision_tree_reg(data, target, test_data, test_target=None):
    model = DecisionTreeRegressor()
    model.fit(data, target)
    prediction = model.predict(test_data)
    cv_score = cross_val_score(model, data, target, cv=10, scoring=scorer)
    # print(prediction)
    # print(test_target)
    print ('F1 score = ' + str(cv_score.mean()))
    print ('F1 score std = ' + str(cv_score.std()))
    return validation.cal_rmse(prediction, test_target)
Пример #5
0
def kneighbors_reg(data, target, test_data, test_target=None):
    model = KNeighborsRegressor()
    model.fit(data, target)
    prediction = model.predict(test_data)
    return validation.cal_rmse(prediction, test_target)
Пример #6
0
def kneighbors(data, target, test_data, test_target=None):
    model = KNeighborsClassifier()
    model.fit(data, target)
    prediction = model.predict(test_data)
    return validation.cal_rmse(prediction, test_target)
Пример #7
0
def logistic_regression(data, target, test_data, test_target=None):
    model = LogisticRegression()
    model.fit(data, target)
    prediction = model.predict(test_data)
    return validation.cal_rmse(prediction, test_target)
Пример #8
0
def linear_regression(data, target, test_data, test_target=None):
    model = LinearRegression()
    model.fit(data, target)
    prediction = model.predict(test_data)
    validation.cal_rmse(prediction, test_target)
    return prediction