def xgboost(data, target, test_data, test_target=None, features=None): n = 406 print ('n = ' + str(n)) model = XGBClassifier() model.fit(data, target) prediction = model.predict(test_data) cv_score = cross_val_score(model, data, target, cv=10, scoring=scorer) # print(prediction) # print(test_target) print ('F1 score = ' + str(cv_score.mean())) print ('F1 score std = ' + str(cv_score.std())) # Feature importances into a dataframe if features is not None: feature_importances = pd.DataFrame({'feature': features, 'importance': model.feature_importances_}) # print(feature_importances.head()) validation.cal_rmse(prediction, test_target)
def gradient_boosting(data, target, test_data, test_target=None): model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=24, random_state=0) model.fit(data, target) prediction = model.predict(test_data) cv_score = cross_val_score(model, data, target, cv=10, scoring=scorer) # print(prediction) # print(test_target) print ('F1 score = ' + str(cv_score.mean())) print ('F1 score std = ' + str(cv_score.std())) return validation.cal_rmse(prediction, test_target)
def random_forest(data, target, test_data, test_target=None, features=None): # for n in range(407, 410): n = 408 # 408 seems best with max_features 59 in these values print ('n = ' + str(n)) model = RandomForestClassifier(n_estimators=n, random_state=10, n_jobs=-1, max_features=59) model.fit(data, target) prediction_fit = model.predict(data) prediction = model.predict(test_data) cv_score = cross_val_score(model, data, target, cv=10, scoring=scorer) # print(prediction) # print(test_target) print ('F1 score = ' + str(cv_score.mean())) print ('F1 score std = ' + str(cv_score.std())) # Feature importances into a dataframe # if features is not None: # feature_importances = pd.DataFrame({'feature': features, 'importance': model.feature_importances_}) # # print(feature_importances.head()) validation.cal_rmse(prediction, test_target) return prediction_fit, prediction
def decision_tree_reg(data, target, test_data, test_target=None): model = DecisionTreeRegressor() model.fit(data, target) prediction = model.predict(test_data) cv_score = cross_val_score(model, data, target, cv=10, scoring=scorer) # print(prediction) # print(test_target) print ('F1 score = ' + str(cv_score.mean())) print ('F1 score std = ' + str(cv_score.std())) return validation.cal_rmse(prediction, test_target)
def kneighbors_reg(data, target, test_data, test_target=None): model = KNeighborsRegressor() model.fit(data, target) prediction = model.predict(test_data) return validation.cal_rmse(prediction, test_target)
def kneighbors(data, target, test_data, test_target=None): model = KNeighborsClassifier() model.fit(data, target) prediction = model.predict(test_data) return validation.cal_rmse(prediction, test_target)
def logistic_regression(data, target, test_data, test_target=None): model = LogisticRegression() model.fit(data, target) prediction = model.predict(test_data) return validation.cal_rmse(prediction, test_target)
def linear_regression(data, target, test_data, test_target=None): model = LinearRegression() model.fit(data, target) prediction = model.predict(test_data) validation.cal_rmse(prediction, test_target) return prediction