y_train=Y_train).predict(x_test=X_test.iloc[:, selected_features])

print("Goodness of Model (Adjusted R-Squared Score):", regressor.r_score())

# In[] 使用「均方差」(Mean Squared Error, MSE)比較兩個模型的好壞。MSE 小者勝
# 均方差定義 = SUM(Yi-Yh)^2/n
# 亦有人將「均方差」再開根號,得到「均方差根」(Root of MSE, RMSE)後,才來比較
from HappyML.performance import rmse

rmse_linear = rmse(Y_test, Y_pred_simple)
rmse_multi = rmse(Y_test, Y_predict)

if rmse_linear < rmse_multi:
    print(
        "RMSE Linear:{:.4f} < RMSE Multi-Linear:{:.4f}...Linear smaller, WIN!!"
        .format(rmse_linear, rmse_multi))
else:
    print(
        "RMSE Linear:{:.4f} > RMSE Multi-Linear:{:.4f}...Multi-Linear smaller, WIN!!"
        .format(rmse_linear, rmse_multi))

# In[] Check for Assumption of Regression
from HappyML.criteria import AssumptionChecker

checker = AssumptionChecker(X_train.iloc[:, selected_features],
                            X_test.iloc[:, selected_features], Y_train, Y_test,
                            Y_predict)
checker.y_lim = (-4, 4)
checker.heatmap = True
checker.check_all()
Exemplo n.º 2
0
request_salary = float(input("請輸入對方要求的月薪:"))
salary_pred = reg_poly.predict(pd.DataFrame([[work_years]])).iloc[0, 0]
salary_lower = reg_poly.predict(pd.DataFrame([[math.floor(work_years)]
                                              ])).iloc[0, 0]
salary_upper = reg_poly.predict(pd.DataFrame([[math.ceil(work_years)]
                                              ])).iloc[0, 0]
print("合理的月薪推測在 {:.2f} 左右".format(salary_pred))
print("範圍落在 {:.2f}~{:.2f} 之間".format(salary_lower, salary_upper))

# In[] Comparing the two models
from HappyML.performance import rmse

rmse_linear = rmse(Y, Y_simple)
rmse_poly = rmse(Y, Y_poly)

if rmse_linear < rmse_poly:
    print(
        "RMSE Linear:{:.4f} < RMSE Polynomial:{:.4f}...Linear smaller, WIN!!".
        format(rmse_linear, rmse_poly))
else:
    print(
        "RMSE Linear:{:.4f} > RMSE Polynomial:{:.4f}...Polynomial smaller, WIN!!"
        .format(rmse_linear, rmse_poly))

# In[] Check for Assumption of Linear Regression

from HappyML.criteria import AssumptionChecker

checker = AssumptionChecker(X, X, Y, Y, Y_poly)
checker.check_all()
Exemplo n.º 3
0
                title="訓練集樣本點 vs. 預測模型",
                font="DFKai-sb")
md.sample_model(sample_data=(X_test, Y_test),
                model_data=(X_test, Y_pred),
                title="測試集樣本點 vs. 預測模型",
                font="DFKai-sb")

# In[] Test for Linearity of Features
#from HappyML import model_drawer as md
#
#for i in range(X_train.shape[1]):
#    md.sample_model(sample_data=(X_train[:, i], Y_train), model_data=None, title="Linearity of Column {}".format(i))

from HappyML.criteria import AssumptionChecker

checker = AssumptionChecker(X_train, X_test, Y_train, Y_test, Y_pred)
checker.sample_linearity()

# In[] Test for Normal Distribution of Residuals
#import scipy.stats as stats
#import matplotlib.pyplot as plt
#
#residuals = (Y_test - Y_pred).ravel()
#stats.probplot(residuals, plot=plt)
#plt.show()

checker.residuals_normality()

# In[] Check for Errpr Uncorrelated
#from statsmodels.stats.stattools import durbin_watson
#
X_test = smtools.add_constant(X_test)

X_opttest = X_test.iloc[:, features]
Y_predmulti = regressor_OLS.predict(X_opttest)

# In[]
from sklearn.metrics import mean_squared_error
import numpy as np

rmse_linear = np.sqrt(mean_squared_error(Y_test, Y_pred_simple))
rmse_multi = np.sqrt(mean_squared_error(Y_test, Y_predmulti))

if rmse_linear < rmse_multi:
    print(
        "RMSE Linear:{:.4f} < RMSE Multi-Linear:{:.4f}...Linear smaller, WIN!!"
        .format(rmse_linear, rmse_multi))
else:
    print(
        "RMSE Linear:{:.4f} > RMSE Multi-Linear:{:.4f}...Multi-Linear smaller, WIN!!"
        .format(rmse_linear, rmse_multi))

# In[]
from HappyML.criteria import AssumptionChecker

checker = AssumptionChecker(X_train.iloc[:, features], X_test.iloc[:,
                                                                   features],
                            Y_train, Y_test, Y_predmulti)
checker.y_lim = (-4, 4)
checker.heatmap = True
checker.check_all()
#recalls = cross_val_score(estimator=classifier.classifier, X=X, y=Y.values.ravel(), scoring="recall", cv=k_fold, n_jobs=-1)
#print("{} Folds Mean Recall: {}".format(k_fold, recalls.mean()))
#
#precisions = cross_val_score(estimator=classifier.classifier, X=X, y=Y.values.ravel(), scoring="precision", cv=k_fold, n_jobs=-1)
#print("{} Folds Mean Precision: {}".format(k_fold, precisions.mean()))
#
#f_scores = cross_val_score(estimator=classifier.classifier, X=X, y=Y.values.ravel(), scoring="f1", cv=k_fold, n_jobs=-1)
#print("{} Folds Mean F1-Score: {}".format(k_fold, f_scores.mean()))

from HappyML.performance import KFoldClassificationPerformance

K = 10
kfp = KFoldClassificationPerformance(x_ary=X, y_ary=Y, classifier=classifier.classifier, k_fold=K, verbose=False)

print("{} Folds Mean Accuracy: {}".format(K, kfp.accuracy()))
print("{} Folds Mean Recall: {}".format(K, kfp.recall()))
print("{} Folds Mean Precision: {}".format(K, kfp.precision()))
print("{} Folds Mean F1-Score: {}".format(K, kfp.f_score()))

# In[] Visualization
import HappyML.model_drawer as md

md.classify_result(x=X_train, y=Y_train, classifier=classifier, title="訓練集 vs. 模型", font='DFKai-sb')
md.classify_result(x=X_test, y=Y_test, classifier=classifier, title="測試集 vs. 模型", font='DFKai-sb')

# In[] Check for Variables Independence
from HappyML.criteria import AssumptionChecker

checker = AssumptionChecker(x_train=X_train, x_test=X_test, y_train=Y_train, y_test=Y_test, y_pred=Y_pred)
checker.features_correlation(heatmap=True)
Exemplo n.º 6
0
X_train, X_test, Y_train, Y_test = pp.split_train_test(x_ary=X, y_ary=Y)

X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))

classifier = NaiveBayesClassifier()
Y_pred = classifier.fit(X_train, Y_train).predict(X_test)

K = 10
Kfp = KFoldClassificationPerformance(X, Y, classifier.classifier, K)

print("{} Folds Mean Accuracy: {}".format(K, Kfp.accuracy()))
print("{} Folds Mean Recall: {}".format(K, Kfp.recall()))
print("{} Folds Mean Precision: {}".format(K, Kfp.precision()))
print("{} Folds Mean F1-Score: {}".format(K, Kfp.f_score()))

checker = AssumptionChecker(X_train, X_test, Y_train, Y_test, Y_pred)
checker.features_correlation()

selector = pp.KBestSelector(best_k=2)
X = selector.fit(X, Y, False, True).transform(X)

X_train = selector.transform(X_train)
X_test = selector.transform(X_test)

X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))

classifier = NaiveBayesClassifier()
Y_pred = classifier.fit(X_train, Y_train).predict(X_test)

md.classify_result(x=X_train, y=Y_train, classifier=classifier, title="訓練集 vs. 模型", font="Microsoft JhengHei")
md.classify_result(x=X_test, y=Y_test, classifier=classifier, title="測試集 vs. 模型", font="Microsoft JhengHei")