def Linear_Regression(df, df1, df2, name): ''' Input: dataframe, string, string, string output: plot, numpy array makes a linear regression model given the inputs and returns the coefficents founds ''' X = df['user_rating_x'].values.reshape(-1, 1) y = df['user_rating_y'].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33, random_state=10) model = LinearRegression().fit(X_train, y_train) pred = model.predict(X_test) coeff = model.coef_ print(LR.linearReg(X_test, y_test).rmsle(y_test, pred)) plt.scatter(X_test * 5, y_test) plt.plot(X_test * 5, pred) plt.ylabel(df2) plt.xlabel(df1) plt.title('Linear Regression for {}'.format(name)) plt.savefig('images/{}_linear_model'.format(name)) plt.show() return coeff
def Lasso_model(df, df1, df2, name): ''' Input: dataframe, string, string, string output: plot, numpy array, float Runs through multiple alphas through the k-folding and makes a lasso model baised on the lowest RMSLE value and makes a plot bassed on the data while returning the coeficents and lambda value used ''' X = df['user_rating_x'].values.reshape(-1, 1) y = df['user_rating_y'].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33, random_state=10) LR_L = LR.linearReg(X_train, y_train) alpha = [.00001, .0001, .001, .01, .1, 1, 10, 100] lasso = LR_L.lasso(alpha, 5) index = lasso['CVtest_mean_RMSLE'].idxmin() a = lasso['lambda'][index] model_L = Lasso(alpha=a).fit(X_train, y_train) pred = model_L.predict(X_test) coeff = model_L.coef_ print(LR_L.rmsle(y_test, pred)) plt.plot(X_test * 5, pred) plt.scatter(X_test * 5, y_test) plt.ylabel(df2) plt.xlabel(df1) plt.title('Lasso Regression for {}'.format(name)) plt.savefig('images/{}_lasso_model'.format(name)) plt.show() return coeff, a
label_encoder = LabelEncoder() X.preferred_foot = label_encoder.fit_transform(X.preferred_foot) # Splitting the train and test data from sklearn.model_selection import train_test_split X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.10,random_state=0) # Find the most significant features using backward elemination print('-'*50) import backwardEle backwardEle.backwardEle(X_train, y_train) print('-'*50) # Applying Linear regression import linearReg linearReg_score = linearReg.linearReg(X_train,X_test,y_train,y_test) print("Pridicting based on Simple Linear regression we get the score as : {:.5}%" \ .format(linearReg_score*100)) print('-'*50) # Applying the XGBoost import xgb reg_xg_score = xgb.xgb(X_train,X_test,y_train,y_test) print("Pridicting after applying XGBoost we get the score as : {:.5}%" \ .format(reg_xg_score*100)) print('-'*50) # Decision Tree # Fitting Decision Tree Regression to the Training set import decisionTreeReg
#!/usr/bin/python import sys import numpy import matplotlib import matplotlib.pyplot as plt from linearReg import linearReg sys.path.append("../../common") from class_vis import plot_regression # noqa from ages_net_worths import ageNetWorthData # noqa matplotlib.use('agg') ages_train, ages_test, net_worths_train, net_worths_test = ageNetWorthData() reg = linearReg(ages_train, net_worths_train) plot_regression(reg, ages_train, ages_test, net_worths_train, net_worths_test) print("Net worth prediction:", reg.predict([[29]])[0][0]) print("Slope", reg.coef_[0][0]) print("Intercept", reg.intercept_[0]) print("Test r-squared:", reg.score(ages_test, net_worths_test)) print("Train r-squared:", reg.score(ages_train, net_worths_train))