def make_svr_pred(df, next_week, debug=0):
    """
    This method creates predictions using support vector regression.
    """
    #Tuned##
    rand_space = {
        'estimator__kernel': ['linear', 'rbf', 'sigmoid'],
        'estimator__gamma': ['auto', 1e-10, 1e-6, 0.9],
        'estimator__epsilon': [1e-10, 1e-6, 0.1, 1],
        'estimator__C': [1e-2, 1, 10],
        'estimator__shrinking': [True, False],
        'estimator__max_iter': [-1, 1, 5, 10, 100, 1000]
    }

    space = {
        'estimator__kernel': ['linear', 'rbf', 'sigmoid'],
        'estimator__gamma': ['auto'],
        'estimator__epsilon': [1e-10, 1e-9, 1e-8, 1e-7, 1e-6],
        'estimator__C': [4, 5, 6],
        'estimator__shrinking': [False],
        'estimator__max_iter': [19, 20, 21]
    }

    params_old = {
        'estimator__kernel': 'rbf',
        'estimator__gamma': 'auto',
        'estimator__epsilon': 1e-8,
        'estimator__C': 5,
        'estimator__shrinking': False,
        'estimator__max_iter': 20
    }
    params = {
        'estimator__kernel': 'linear',
        'estimator__gamma': 'auto',
        'estimator__epsilon': 1e-10,
        'estimator__C': 1e-2,
        'n_jobs': -1,
        'estimator__shrinking': False,
        'estimator__max_iter': -1
    }
    X_train, X_test, Y_train, Y_test = process_data(df, next_week)
    multi_svr = MultiOutputRegressor(SVR())
    multi_svr.set_params(**params)
    #best_random = random_search(multi_svr, rand_space, next_week, 100, 3, X_train, Y_train)
    #best_random = grid_search(multi_svr, space, next_week, 3, X_train, Y_train)
    multi_svr.fit(X_train, Y_train)
    next_week[Y_train.columns] = multi_svr.predict(next_week[X_train.columns])
    if debug:
        y_pred_untrain = multi_svr.predict(X_train)
        print(next_week)
        print("Score: ", multi_svr.score(X_train, Y_train) * 100)
        print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain))
        print(
            "CV: ",
            ms.cross_val_score(multi_svr,
                               Y_train,
                               y_pred_untrain,
                               cv=10,
                               scoring='neg_mean_squared_error'))
    return next_week
def make_lr_pred(df, next_week, debug=0):
    """
    This method creates predictions using linear regression.
    """
    #Tuned
    space = {
        'estimator__fit_intercept': [True, False],
        'estimator__normalize': [True, False]
    }
    params = {'estimator__fit_intercept': True, 'estimator__normalize': False}
    X_train, X_test, Y_train, Y_test = process_data(df, next_week)
    multi_lr = MultiOutputRegressor(LinearRegression())
    #best_random = grid_search(multi_lr, space, next_week, 10, X_train, Y_train)
    multi_lr.set_params(**params)
    multi_lr.fit(X_train, Y_train)
    next_week[Y_train.columns] = multi_lr.predict(next_week[X_train.columns])
    y_pred_untrain = multi_lr.predict(X_train)
    if debug:
        print(next_week)
        print("Score: ", multi_lr.score(X_train, Y_train) * 100)
        print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain))
        print(
            "CV: ",
            ms.cross_val_score(multi_lr,
                               Y_train,
                               y_pred_untrain,
                               cv=10,
                               scoring='neg_mean_squared_error'))
    return next_week
def make_lasso_pred(df, next_week, debug=0):
    """
    This method makes predictions using lasso regression.
    """
    #Tuned##
    rand_space = {
        'estimator__alpha': [900, 1000, 1100],
        'estimator__normalize': [True, False],
        'estimator__fit_intercept': [True, False],
        'estimator__positive': [True, False],
        'estimator__max_iter': [10000, 50000, 100000]
    }

    space = {
        'estimator__alpha': [3, 4, 5],
        'estimator__normalize': [True],
        'estimator__fit_intercept': [True],
        'estimator__positive': [False],
        'estimator__max_iter': [1]
    }
    params_old = {
        'estimator__alpha': 3,
        'estimator__normalize': True,
        'estimator__fit_intercept': True,
        'estimator__positive': False,
        'estimator__max_iter': 1
    }
    params = {
        'estimator__alpha': 10,
        'estimator__normalize': False,
        'estimator__fit_intercept': True,
        'n_jobs': -1,
        'estimator__positive': False,
        'estimator__max_iter': 750
    }
    #'estimator__max_iter':10}
    X_train, X_test, Y_train, Y_test = process_data(df, next_week)
    multi_lasso = MultiOutputRegressor(Lasso())
    multi_lasso.set_params(**params)
    #best_random = random_search(multi_lasso, rand_space, next_week, 50, 3, X_train, Y_train)
    #best_random = grid_search(multi_lasso, space, next_week, 3, X_train, Y_train)
    multi_lasso.fit(X_train, Y_train)
    next_week[Y_train.columns] = multi_lasso.predict(
        next_week[X_train.columns])
    if debug:
        y_pred_untrain = multi_lasso.predict(X_train)
        print(next_week)
        print("Score: ", multi_lasso.score(X_train, Y_train) * 100)
        print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain))
        print(
            "CV: ",
            ms.cross_val_score(multi_lasso,
                               Y_train,
                               y_pred_untrain,
                               cv=10,
                               scoring='neg_mean_squared_error'))
    return next_week
def make_elastic_pred(df, next_week, debug=0):
    """
    This method creates predictions using elastic net regression.
    """
    #Tuned##
    rand_space = {
        'estimator__alpha': [1e-1],
        'estimator__l1_ratio': [0.7, 0.8],
        'estimator__fit_intercept': [True],
        'estimator__normalize': [True],
        'estimator__precompute': [False],
        'estimator__positive': [True],
        'estimator__max_iter': [11000, 12000, 13000],
        'estimator__selection': ['random']
    }
    space = {
        'estimator__alpha': [1e-5, 1e-1, 1, 10],
        'estimator__l1_ratio': [0, 0.25, 0.5, 0.75, 1],
        'estimator__fit_intercept': [True, False],
        'estimator__normalize': [True, False],
        'estimator__precompute': [True, False],
        'estimator__positive': [True, False],
        'estimator__max_iter': [10, 100, 1000, 10000],
        'estimator__selection': ['cyclic', 'random']
    }
    params_old = {
        'estimator__alpha': 0.1,
        'estimator__l1_ratio': 0.7,
        'estimator__fit_intercept': True,
        'estimator__normalize': True,
        'estimator__precompute': False,
        'estimator__positive': True,
        'estimator__max_iter': 11000,
        'estimator__selection': 'random'
    }
    params = {
        'estimator__alpha': 10,
        'estimator__l1_ratio': 1,
        'estimator__fit_intercept': True,
        'estimator__normalize': False,
        'estimator__precompute': True,
        'n_jobs': -1,
        'estimator__positive': True,
        #'estimator__max_iter': 10,
        'estimator__max_iter': 500,
        'estimator__selection': 'random'
    }
    X_train, X_test, Y_train, Y_test = process_data(df, next_week)
    multi_en = MultiOutputRegressor(ElasticNet())
    multi_en.set_params(**params)
    #best_random = random_search(multi_en, rand_space, next_week, 100, 3, X_train, Y_train)
    #best_random = grid_search(multi_en, rand_space, next_week, 3, X_train, Y_train)
    multi_en.fit(X_train, Y_train)
    next_week[Y_train.columns] = multi_en.predict(next_week[X_train.columns])
    if debug:
        y_pred_untrain = multi_en.predict(X_train)
        print(next_week)
        print("Score: ", multi_en.score(X_train, Y_train) * 100)
        print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain))
        print(
            "CV: ",
            ms.cross_val_score(multi_en,
                               Y_train,
                               y_pred_untrain,
                               cv=10,
                               scoring='neg_mean_squared_error'))
    return next_week
def make_gb_pred(df, next_week, debug=0):
    """
    This method creates predictions using gradient boosting regression.
    """
    #Tuned##
    rand_space = {
        'estimator__alpha': [1e-6, 1e-5, 1e-4],
        'estimator__learning_rate': [0.4, 0.5, 0.6],
        'estimator__loss': ['ls', 'lad', 'huber', 'quantile'],
        'estimator__n_estimators': [500, 1000, 1500],
        'estimator__max_leaf_nodes': [50, 100, 200],
        'estimator__min_samples_split': [4, 5, 6],
        'estimator__min_samples_leaf': [5, 10, 50],
        'estimator__min_weight_fraction_leaf': [0.4, 0.5],
        'estimator__max_depth': [5, 10, 50],
        'estimator__max_features': ['auto', 'sqrt', None, 1, 5]
    }

    space = {
        'estimator__alpha': [0.6],
        'estimator__learning_rate': [0.5],
        'estimator__loss': ['ls'],
        'estimator__n_estimators': [1000],
        'estimator__max_leaf_nodes': [36, 37, 38, 39],
        'estimator__min_samples_split': [4],
        'estimator__min_samples_leaf': [10],
        'estimator__min_weight_fraction_leaf': [0.5],
        'estimator__max_depth': [14],
        'estimator__max_features': [1]
    }

    params_old = {
        'estimator__alpha': 0.6,
        'estimator__learning_rate': 0.5,
        'estimator__loss': 'ls',
        'estimator__n_estimators': 1000,
        'estimator__max_leaf_nodes': 38,
        'estimator__min_samples_split': 4,
        'estimator__min_samples_leaf': 10,
        'estimator__min_weight_fraction_leaf': 0.5,
        'estimator__max_depth': 14,
        'estimator__max_features': 1
    }
    params = {
        'estimator__learning_rate': 0.9,
        'estimator__loss': 'ls',
        'estimator__n_estimators': 1,
        'estimator__max_leaf_nodes': 50,
        'estimator__min_samples_split': 10,
        'estimator__min_samples_leaf': 5,
        'estimator__min_weight_fraction_leaf': 0.2,
        'n_jobs': -1,
        'estimator__max_depth': 10,
        'estimator__max_features': 5
    }
    X_train, X_test, Y_train, Y_test = process_data(df, next_week)
    multi_gbr = MultiOutputRegressor(GradientBoostingRegressor())
    #best_random = random_search(multi_gbr, rand_space, next_week, 200, 3, X_train, Y_train)
    #best_random = grid_search(multi_gbr, space, next_week, 3, X_train, Y_train)
    multi_gbr.set_params(**params)
    multi_gbr.fit(X_train, Y_train)
    next_week[Y_train.columns] = multi_gbr.predict(next_week[X_train.columns])
    if debug:
        y_pred_untrain = multi_gbr.predict(X_train)
        print(next_week.to_string())
        print("Score: ", multi_gbr.score(X_train, Y_train) * 100)
        print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain))
        print(
            "CV: ",
            ms.cross_val_score(multi_gbr,
                               Y_train,
                               y_pred_untrain,
                               cv=3,
                               scoring='neg_mean_squared_error'))
    return next_week
def make_knn_pred(df, next_week, debug=0):
    """
    This method creates predictions using k-nearest neighbors.
    """
    #Tuned##
    rand_space = {
        'estimator__n_neighbors': [5, 10, 15],
        'estimator__weights': ['uniform', 'distance'],
        'estimator__algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
        'estimator__leaf_size': [50, 100, 150, 200],
        'estimator__p': [1, 2, 3]
    }
    space = {
        'estimator__n_neighbors': [14, 15, 16],
        'estimator__weights': ['distance'],
        'estimator__algorithm': ['auto', 'brute'],
        'estimator__leaf_size': [50, 90, 100, 110, 150],
        'estimator__p': [1]
    }
    params_old = {
        'estimator__n_neighbors': 15,
        'estimator__weights': 'distance',
        'estimator__algorithm': 'brute',
        'estimator__leaf_size': 50,
        'estimator__p': 1
    }
    params = {
        'estimator__n_neighbors': 10,
        'estimator__weights': 'uniform',
        'estimator__algorithm': 'auto',
        'estimator__leaf_size': 1,
        'estimator__n_jobs': -1,
        'n_jobs': -1,
        'estimator__p': 1
    }
    X_train, X_test, Y_train, Y_test = process_data(df, next_week)
    multi_knn = MultiOutputRegressor(neighbors.KNeighborsRegressor())
    multi_knn.set_params(**params)
    #best_random = random_search(multi_knn, rand_space, next_week, 100, 3, X_train, Y_train)
    #best_random = grid_search(multi_knn, space, next_week, 3, X_train, Y_train)
    try:
        multi_knn.fit(X_train, Y_train)
        next_week[Y_train.columns] = multi_knn.predict(
            next_week[X_train.columns])
    except ValueError as error:
        params = {
            'estimator__n_neighbors': len(df.index) - 1,
            #'verbose':0,
            'estimator__weights': 'distance',
            'estimator__algorithm': 'brute',
            'estimator__leaf_size': 50,
            'estimator__p': 1
        }
        multi_knn.set_params(**params)
        multi_knn.fit(X_train, Y_train)
        next_week[Y_train.columns] = multi_knn.predict(
            next_week[X_train.columns])
    if debug:
        y_pred_untrain = multi_knn.predict(X_train)
        print(next_week)
        print("Score: ", multi_knn.score(X_train, Y_train) * 100)
        print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain))
        print(
            "CV: ",
            ms.cross_val_score(multi_knn,
                               Y_train,
                               y_pred_untrain,
                               cv=10,
                               scoring='neg_mean_squared_error'))
    return next_week
def make_rf_pred(df, next_week, debug=0):
    """
    This method creates predictions using random forest.
    """
    #Tuned##
    params_old = {
        'estimator__bootstrap': True,
        'estimator__max_depth': 5,
        'estimator__max_features': 'sqrt',
        'estimator__random_state': 4,
        'estimator__min_samples_leaf': 9,
        'estimator__min_samples_split': 20,
        'estimator__n_estimators': 800
    }

    params = {
        'estimator__bootstrap': False,
        'estimator__max_depth': 3,
        'estimator__max_features': 'sqrt',
        'estimator__random_state': 4,
        'estimator__min_samples_leaf': 1,
        'estimator__min_samples_split': 2,
        'estimator__n_jobs': -1,
        'n_jobs': -1,
        'estimator__n_estimators': 200
    }
    rand_space = {
        'estimator__bootstrap': [True, False],
        'estimator__max_depth': [int(x) for x in np.linspace(10, 110, num=11)],
        'estimator__max_features': ['auto', 'sqrt'],
        'estimator__random_state': [4],
        'estimator__min_samples_leaf': [1, 2, 4, 8],  #132
        'estimator__min_samples_split': [2, 5, 10],  #396
        'estimator__n_estimators':
        [int(x) for x in np.linspace(200, 2000, num=10)]
    }  #3960
    space = {
        'estimator__bootstrap': [True],
        'estimator__max_depth': [5],
        'estimator__max_features': ['sqrt'],
        'estimator__random_state': [4],
        'estimator__min_samples_leaf': [9],  #132
        'estimator__min_samples_split': [15, 20, 25],  #396
        'estimator__n_estimators': [800]
    }  #3960
    X_train, X_test, Y_train, Y_test = process_data(df, next_week)
    multi_rf = MultiOutputRegressor(RandomForestRegressor())
    multi_rf.set_params(**params)
    #best_random = random_search(multi_rf, rand_space, next_week, 100, 3, X_train, Y_train),
    #best_random = grid_search(multi_rf, space, next_week, 3, X_train, Y_train)
    multi_rf.fit(X_train, Y_train)
    next_week[Y_train.columns] = multi_rf.predict(next_week[X_train.columns])
    if debug:
        y_pred_untrain = multi_rf.predict(X_train)
        print(next_week.to_string())
        print("Score: ", multi_rf.score(X_train, Y_train) * 100)
        print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain))
        print(
            "CV: ",
            ms.cross_val_score(multi_rf,
                               Y_train,
                               y_pred_untrain,
                               cv=10,
                               scoring='neg_mean_squared_error'))
    return next_week
示例#8
0
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split

x, y = datahelper.get_xy('data/', num_hours=3, error_minutes=15)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

gradient_boost = GradientBoostingRegressor(learning_rate=0.1)
model = MultiOutputRegressor(estimator=gradient_boost, n_jobs=-1)

estimators = np.arange(10, 1000, 10)
scores = dict()
current_index = 0
for n in estimators:
    model.set_params(
        estimator=GradientBoostingRegressor(n_estimators=n, learning_rate=0.1))
    model.fit(x_train, y_train)
    scores[current_index] = model.score(x_test, y_test)
    current_index += 1

sorted_by_scores = [(k, scores[k])
                    for k in sorted(scores, key=scores.get, reverse=True)]

print('Results of 5 estimators giving best results:\n')

for i in range(0, 5):
    index, score = sorted_by_scores[i]
    print("Number of estimators = ", estimators[index])

    model.set_params(estimator=GradientBoostingRegressor(
        n_estimators=estimators[index], learning_rate=0.1))