示例#1
0
def MELHOR_RESULTADO_MG():
    X = dataSet[:, 0:4]
    y = dataSet[:, 4]
    param = {
        'learning_rate': [0.05],
        'max_depth': [50],
        'max_features': ['log2'],
        'min_samples_leaf': [11],
        'n_estimators': [60]
    }
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=9)
    model = GradientBoostingRegressor(random_state=0)
    grid = GridSearchCV(model,
                        param,
                        cv=10,
                        verbose=0,
                        n_jobs=-1,
                        scoring='r2',
                        iid=True)
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    print("Cross Validation  R2 Score :", grid.best_score_)
    print(grid.best_params_)
    pltResults(best_model, 1, X_train, X_test, y_train, y_test)
    #Conluido - Best Score: 0.73465 Semente: 9
    #Best Params {'learning_rate': 0.05, 'max_depth': 50, 'max_features': 'log2', 'min_samples_leaf': 11, 'n_estimators': 60, 'random_state': 0}
    #Best Score 0.7346493872397788
    #Best Seed 9
    #R2 Test:  0.7641468991808511  MSE Test:  0.0037531633086605658
    #R2 Train:  0.8748115698686528  MSE Train:  0.003076693813010081

    MELHOR_RESULTADO_MG()
示例#2
0
def MELHOR_RESULTADO_NA():
    X = dataSet[:, 0:4]
    y = dataSet[:, 5]
    param = {
        'learning_rate': [0.05],
        'max_depth': [20],
        'max_features': ['log2'],
        'min_samples_leaf': [5],
        'n_estimators': [50],
    }
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=1)
    model = GradientBoostingRegressor(random_state=0)
    grid = GridSearchCV(model,
                        param,
                        cv=10,
                        verbose=0,
                        n_jobs=-1,
                        scoring='r2',
                        iid=True)
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    print("Cross Validation  R2 Score :", grid.best_score_)

    pltResults(best_model, 1, X_train, X_test, y_train, y_test)

    #Conluido - Best Score: 0.73448 Semente: 9
    #Best Params {'learning_rate': 0.05, 'max_depth': 10, 'max_features': 2, 'min_samples_leaf': 5, 'n_estimators': 50, 'random_state': 0}
    #Best Score 0.734480868500437
    MELHOR_RESULTADO_NA()
示例#3
0
def MELHOR_RESULTADO_K():
    X = dataSet[:, 0:4]
    y = dataSet[:, 6]
    param = {
        'bootstrap': [True],
        'max_depth': [10],
        'max_features': ['log2'],
        'min_samples_leaf': [2],
        'n_estimators': [60]
    }
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=3)
    model = RandomForestRegressor(random_state=0)
    grid = GridSearchCV(model,
                        param,
                        cv=10,
                        verbose=0,
                        n_jobs=-1,
                        scoring='r2',
                        iid=True)
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    print("Cross Validation  R2 Score :", grid.best_score_)
    print(grid.best_params_)
    pltResults(best_model, 0, X_train, X_test, y_train, y_test)
示例#4
0
def MELHOR_RESULTADO_NA():
    X = dataSet[:, 0:4]
    y = dataSet[:, 5]
    param = {
        'bootstrap': [True],
        'max_depth': [10],
        'max_features': ['log2'],
        'min_samples_leaf': [2],
        'n_estimators': [100]
    }
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=2)
    model = RandomForestRegressor(random_state=0)
    grid = GridSearchCV(model,
                        param,
                        cv=10,
                        verbose=0,
                        n_jobs=-1,
                        scoring='r2',
                        iid=True)
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    print("Cross Validation  R2 Score :", grid.best_score_)
    print(grid.best_params_)
    pltResults(best_model, 0, X_train, X_test, y_train, y_test)
    #Conluido - Best Score: 0.61644 Semente: 2
    #Best Params {'bootstrap': True, 'max_depth': 10, 'max_features': 'log2', 'min_samples_leaf': 2, 'n_estimators': 100}
    #Best Score 0.6164423932855161
    #Best Seed 2
    #R2 Test:  0.6470743582563896  MSE Test:  0.011315230193482718
    #R2 Train:  0.8844389141031836  MSE Train:  0.002520229138039719
    MELHOR_RESULTADO_NA()
示例#5
0
def MELHOR_RESULTADO_MG():
    X = dataSet[:, 0:4]
    y = dataSet[:, 4]
    param = {
        'bootstrap': [True],
        'max_depth': [20],
        'max_features': ['auto'],
        'min_samples_leaf': [4],
        'n_estimators': [60]
    }
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=9)
    model = RandomForestRegressor(random_state=0)
    grid = GridSearchCV(model,
                        param,
                        cv=10,
                        verbose=0,
                        n_jobs=-1,
                        scoring='r2',
                        iid=True)
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    print("Cross Validation  R2 Score :", grid.best_score_)
    print(grid.best_params_)
    pltResults(best_model, 3, X_train, X_test, y_train, y_test)
    #Conluido - Best Score: 0.73366 Semente: 9
    #Best Params {'bootstrap': True, 'max_depth': 20, 'max_features': 'auto', 'min_samples_leaf': 4, 'n_estimators': 60}
    #Best Score 0.7336590449066758
    #Best Seed 9
    #R2 Test:  0.7158820898963456  MSE Test:  0.00452120795457348
    #R2 Train:  0.8717629515241512  MSE Train:  0.0031516181905177796
    MELHOR_RESULTADO_MG()
示例#6
0
def MELHOR_RESULTADO_K():
    X = dataSet[:,0:4]
    y = dataSet[:,6]
    param ={'algorithm': ['brute'], 'n_neighbors': [8], 'weights': ['distance']}
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=4)
    model = KNeighborsRegressor()
    grid = GridSearchCV(model,param, cv=10,verbose=0,n_jobs=-1,scoring='r2',iid=True)
    grid.fit(X_train,y_train)
    best_model = grid.best_estimator_ 
    print("Cross Validation  R2 Score :",grid.best_score_)
    print(grid.best_params_)
    pltResults(best_model,3,X_train,X_test,y_train,y_test)
    #Conluido - Best Score: 0.67596 Semente: 4 
    #Best Params {'algorithm': 'brute', 'n_neighbors': 8, 'weights': 'distance'}
    #Best Score 0.6759638656720487
    #Best Seed 4
    #R2 Test:  0.8262489727826184  MSE Test:  0.0030635365836426814
    #R2 Train:  0.9999999999999659  MSE Train:  8.640127461660114e-16
    MELHOR_RESULTADO_K()
示例#7
0
def MELHOR_RESULTADO_NA():
    X = dataSet[:,0:4]
    y = dataSet[:,5]
    param ={'algorithm': ['brute'], 'n_neighbors': [6], 'weights': ['distance']}
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=9)
    model = KNeighborsRegressor()
    grid = GridSearchCV(model,param, cv=10,verbose=0,n_jobs=-1,scoring='r2',iid=True)
    grid.fit(X_train,y_train)
    best_model = grid.best_estimator_ 
    print("Cross Validation  R2 Score :",grid.best_score_)
    print(grid.best_params_)
    pltResults(best_model,3,X_train,X_test,y_train,y_test)
    #Conluido - Best Score: 0.62096 Semente: 9 
    #Best Params {'algorithm': 'brute', 'n_neighbors': 6, 'weights': 'distance'}
    #Best Score 0.6209552829789358
    #Best Seed 9
    #R2 Test:  0.7066316533521843  MSE Test:  0.009787010608152074
    #R2 Train:  0.9999999999999105  MSE Train:  1.9219260943726335e-15

    MELHOR_RESULTADO_NA()
示例#8
0
def MELHOR_RESULTADO_MG():
    X = dataSet[:,0:4]
    y = dataSet[:,4]
    param ={'algorithm': ['auto'], 'n_neighbors': [6], 'weights': ['distance']}
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=5)
    model = KNeighborsRegressor()
    grid = GridSearchCV(model,param, cv=10,verbose=0,n_jobs=-1,scoring='r2',iid=True)
    grid.fit(X_train,y_train)
    best_model = grid.best_estimator_ 
    print("Cross Validation  R2 Score :",grid.best_score_)
    print(grid.best_params_)
    pltResults(best_model,3,X_train,X_test,y_train,y_test)
    
    #Conluido - Best Score: 0.69719 Semente: 4 
    #Best Params {'algorithm': 'auto', 'n_neighbors': 6, 'weights': 'distance'}
    #Best Score 0.6971875382557432
    #Best Seed 4
    #R2 Test:  0.8477948730251221  MSE Test:  0.0026071842742281586
    #R2 Train:  1.0  MSE Train:  0.0

    MELHOR_RESULTADO_MG()
示例#9
0
#dataSet = loadMainDataSet()
#Set features and target
y_column = 2 
X = dataSet[:,0:2]
y = dataSet[:,2]


best_model , best_params, best_score,best_seed = findBalancedDataSet(range(1,10),X,y,GridSearchCVKNeighborsRegressor)

print("#Best Params",best_params)
print("#Best Score",best_score)
print("#Best Seed",best_seed)


X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=best_seed)
pltResults(best_model,X.shape[1]-1,X_train,X_test,y_train,y_test)

########## MG #################

#Conluido - Best Score: 0.69719 Semente: 4 
#Best Params {'algorithm': 'auto', 'n_neighbors': 6, 'weights': 'distance'}
#Best Score 0.6971875382557432
#Best Seed 4
#R2 Test:  0.8477948730251221  MSE Test:  0.0026071842742281586
#R2 Train:  1.0  MSE Train:  0.0


def MELHOR_RESULTADO_MG():
    X = dataSet[:,0:4]
    y = dataSet[:,4]
    param ={'algorithm': ['auto'], 'n_neighbors': [6], 'weights': ['distance']}
示例#10
0
def MELHOR_RESULTADO_K():
    X = dataSet[:, 0:4]
    y = dataSet[:, 6]
    param = {
        'learning_rate': [0.05],
        'max_depth': [30],
        'max_features': ['log2'],
        'min_samples_leaf': [4],
        'n_estimators': [90],
    }
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=3)
    model = GradientBoostingRegressor(random_state=0)
    grid = GridSearchCV(model,
                        param,
                        cv=10,
                        verbose=0,
                        n_jobs=-1,
                        scoring='r2',
                        iid=True)
    grid.fit(X_train, y_train)
    best_model = grid.best_estimator_
    print(grid.best_score_)
    print(grid.best_params_)
    pltResults(best_model, 0, X_train, X_test, y_train, y_test)
    #Conluido - Best Score: 0.63825 Semente: 3
    #Best Params {'learning_rate': 0.05, 'max_depth': 30, 'max_features': 'log2', 'min_samples_leaf': 4, 'n_estimators': 90, 'random_state': 0}
    #Best Score 0.6382454854124988
    #R2 Test:  0.6772838242726009  MSE Test:  0.0078083038481006
    #R2 Train:  0.9892885578561049  MSE Train:  0.00025464662511601085

    MELHOR_RESULTADO_K()


############### MG ##########################
#Conluido - Best Score: 0.65662 Semente: 9
#Best Params {'learning_rate': 0.05, 'max_depth': 7, 'max_features': 'auto', 'min_samples_leaf': 5, 'n_estimators': 19, 'random_state': 0}
#Best Score 0.6566162097362611
#Best Seed 9
#R2 Test:  0.6698401069997604  MSE Test:  0.005253880453960908
#R2 Train:  0.7738415101819238  MSE Train:  0.0055581847751659365

#Conluido - Best Score: 0.73203 Semente: 9
#Best Params {'learning_rate': 0.05, 'max_depth': 50, 'max_features': 'log2', 'min_samples_leaf': 5, 'n_estimators': 50, 'random_state': 0}
#Best Score 0.7320304172385002
#Best Seed 9
#R2 Test:  0.6928402471363506  MSE Test:  0.004887876014100292
#R2 Train:  0.9412048892640388  MSE Train:  0.0014449782080243406

#Conluido - Best Score: 0.73465 Semente: 9
#Best Params {'learning_rate': 0.05, 'max_depth': 50, 'max_features': 'log2', 'min_samples_leaf': 11, 'n_estimators': 60, 'random_state': 0}
#Best Score 0.7346493872397788
#Best Seed 9
#R2 Test:  0.7641468991808511  MSE Test:  0.0037531633086605658
#R2 Train:  0.8748115698686528  MSE Train:  0.003076693813010081

############### Na ##########################

#Conluido - Best Score: 0.59157 Semente: 2
#Best Params {'learning_rate': 0.05, 'max_depth': 50, 'max_features': 'log2', 'min_samples_leaf': 5, 'n_estimators': 50, 'random_state': 0}
#Best Score 0.5915734707430875
#Best Seed 2
#R2 Test:  0.6002369455399681  MSE Test:  0.012816895257928434
#R2 Train:  0.9122383051241635  MSE Train:  0.0019139624633444869

#Conluido - Best Score: 0.59157 Semente: 2
#Best Params {'learning_rate': 0.05, 'max_depth': 20, 'max_features': 'log2', 'min_samples_leaf': 5, 'n_estimators': 50, 'random_state': 0}
#Best Score 0.5915734707430875
#Best Seed 2
#R2 Test:  0.6002369455399681  MSE Test:  0.012816895257928434
#R2 Train:  0.9122383051241635  MSE Train:  0.0019139624633444869

#Conluido - Best Score: 0.59717 Semente: 2
#Best Params {'learning_rate': 0.05, 'max_depth': 5, 'max_features': 'log2', 'min_samples_leaf': 5, 'n_estimators': 50, 'random_state': 0}
#Best Score 0.5971743288694583
#Best Seed 2
#R2 Test:  0.6153244736541088  MSE Test:  0.012333170548047006
#R2 Train:  0.8709102998267071  MSE Train:  0.002815269701498251

############### K #############################

#Conluido - Best Score: 0.63825 Semente: 3
#Best Params {'learning_rate': 0.05, 'max_depth': 30, 'max_features': 'log2', 'min_samples_leaf': 4, 'n_estimators': 90, 'random_state': 0}
#Best Score 0.6382454854124988
#Best Seed 3
#R2 Test:  0.6772838242726009  MSE Test:  0.0078083038481006
#R2 Train:  0.9892885578561049  MSE Train:  0.00025464662511601085