# Split Training vs. Testing Set X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=0.8) # Feature Scaling (optional) #X_train, X_test = pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) #Y_train, Y_test = pp.feature_scaling(fit_ary=Y_train, transform_arys=(Y_train, Y_test)) # In[] Create Linear Regressor from HappyML.regression import SimpleRegressor simple_reg = SimpleRegressor() Y_pred_simple = simple_reg.fit(X_train, Y_train).predict(X_test) # R-Squared always increase in multiple linear regression --> Use Adjusted R-Squared instead print("Goodness of Model (R-Squared Score):", simple_reg.r_score(X_test, Y_test)) # In[] Multiple Linear Regression ## Add one column X0 for constant C0 #import statsmodels.tools.tools as smtools #X_train = smtools.add_constant(X_train) # ## Try-and-Error of Selecting Features with Backward Elimination #import statsmodels.api as sm # #features = [0, 1, 2, 3, 4, 5] ##features = [0, 1, 2, 3, 5] ##features = [0, 1, 2, 3] ##features = [0, 1, 3] ##features = [0, 1]
y_ary=Y, train_size=0.8) # Feature Scaling #X = pp.feature_scaling(fit_ary=X, transform_arys=(X)) #Y = pp.feature_scaling(fit_ary=Y, transform_arys=(Y)) # In[] Linear Regression as comparison from HappyML.regression import SimpleRegressor import HappyML.model_drawer as md reg_simple = SimpleRegressor() Y_simple = reg_simple.fit(x_train=X, y_train=Y).predict(x_test=X) md.sample_model(sample_data=(X, Y), model_data=(X, Y_simple)) print("R-Squared of Simple Regression:", reg_simple.r_score(x_test=X, y_test=Y)) # In[] Polynomial Regression #from sklearn.preprocessing import PolynomialFeatures #from HappyML.performance import rmse #import pandas as pd # #deg=5 #poly_reg = PolynomialFeatures(degree=deg) #X_poly = pd.DataFrame(poly_reg.fit_transform(X)) # #regressor = SimpleRegressor() #regressor.fit(X_poly, Y) #Y_predict = regressor.predict(x_test=X_poly) # #md.sample_model(sample_data=(X, Y), model_data=(X, Y_predict))
X = pp.onehot_encoder(X, columns=[3], remove_trap=True) X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=0.8) X_train, X_test= pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) Y_train, Y_test= pp.feature_scaling(fit_ary=Y_train, transform_arys=(Y_train, Y_test)) # In[] from HappyML.regression import SimpleRegressor simple_reg = SimpleRegressor() Y_pred_simple = simple_reg.fit(X_train, Y_train).predict(X_test) # R-Squared always increase in multiple linear regression --> Use Adjusted R-Squared instead print("Goodness of Model (R-Squared Score):", simple_reg.r_score(X_test, Y_test)) # In[] from HappyML.regression import MultipleRegressor X_train = pp.add_constant(X_train) X_test = pp.add_constant(X_test) regressor = MultipleRegressor() selected_features = regressor.backward_elimination(x_train=X_train, y_train=Y_train) Y_predict = regressor.fit(x_train=X_train.iloc[:, selected_features], y_train=Y_train).predict(x_test=X_test.iloc[:, selected_features]) print("Goodness of Model (R-Squared Score):", regressor.r_score()) # In[]
transform_arys=(Y_train, Y_test)) # In[] Fitting Simple Regressor # from sklearn.linear_model import LinearRegression # regressor = LinearRegression() # regressor.fit(X_train, Y_train) # Y_pred = regressor.predict(X_test) # R_Score = regressor.score(X_test, Y_test) from HappyML.regression import SimpleRegressor regressor = SimpleRegressor() Y_pred = regressor.fit(X_train, Y_train).predict(X_test) print("R-Squared Score:", regressor.r_score(X_test, Y_test)) # In[] Visualize the Training Set #import matplotlib.pyplot as plt # #plt.scatter(X_train, Y_train, color="red") #plt.plot(X_train, regressor.predict(X_train), color="blue") #plt.title("Salary vs. Experience") #plt.xlabel("Experience") #plt.ylabel("Salary") #plt.show() from HappyML import model_drawer as md sample_data = (X_train, Y_train) model_data = (X_train, regressor.predict(X_train))