def polynomial(power=5, random_state=9): # Function to import import numerical feature which have highest corr def Max_important_feature(data_set, target_variable='SalePrice', n=4): col = data_set.corr().nlargest( n=(n + 1), columns=target_variable)[target_variable].index[1:] return list(col) def load_data(df, test_size=0.33, random_state=random_state): X = df.iloc[:, :-1] y = df['SalePrice'] X_train, X_test, y_train, y_test = train_test_split( X, y, random_state=random_state, test_size=test_size) return df, X_train, X_test, y_train, y_test col = Max_important_feature(data_set, target_variable='SalePrice', n=4) df = data_set[col] df['SalePrice'] = data_set['SalePrice'] df, X_train, X_test, y_train, y_test = load_data(df, test_size=0.33, random_state=random_state) pipeline = make_pipeline( PolynomialFeatures(degree=power, include_bias=False), LinearRegression()) model = pipeline.fit(X_train, y_train) return model
def test_Max_important_feature_result_values(self): data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') arr = Max_important_feature(data_set) expected_list = ['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea'] # Return value tests self.assertListEqual(list(arr), expected_list, "Expected output does not match the given" "output")
def polynomial(power = 5 , Random_state = 9): data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv') X = (X_train.loc[:,['OverallQual','GrLivArea','GarageCars','GarageArea']]) Y = data_set.loc[:,'SalePrice'] * 2.25300767065259 * 1.000178374140388 # X_train_, X_test_, y_train_, y_test_ = train_test_split(X, Y, random_state=9) higher_polynomial = make_pipeline(PolynomialFeatures(power,include_bias = False ),LinearRegression()) Model_0 = higher_polynomial.fit(X, y_train) return Model_0
def polynomial(power=5, random_state=9): data_set, X_train, X_test, y_train, y_test = load_data( 'data/house_prices_multivariate.csv') cols = ['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea'] poly_model = make_pipeline(PolynomialFeatures(5, include_bias=False), LinearRegression()) poly_model.fit(X_train[cols], y_train.reshape(-1, 1)) ypred = poly_model.predict(np.array([4, 5, 6, 7]).reshape(1, -1)) return (poly_model)
def polynomial(power=5, Random_state=9): data_set, X_train, X_test, y_train, y_test = load_data( 'data/house_prices_multivariate.csv', random_state=Random_state) arr = np.array( data_set.corr()['SalePrice'].sort_values(ascending=False).index) model = make_pipeline(PolynomialFeatures(degree=power, include_bias=False), LinearRegression()) model.fit(X_train.loc[:, arr[1:4 + 1]], y_train) return model
def ridge(alpha=0.01): data_set, X_train, X_test, y_train, y_test = load_data( 'data/house_prices_multivariate.csv', random_state=9) ridge = Ridge(alpha=alpha, normalize=True) ridge = ridge.fit(X_train, y_train) y_pred_test = ridge.predict(X_test) y_pred_train = ridge.predict(X_train) rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train)) rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test)) return rmse_train, rmse_test, ridge
def lasso(alpha=0.01): data_set, X_train, X_test, y_train, y_test = load_data( 'data/house_prices_multivariate.csv') np.random.seed(9) lasso_model = Lasso(alpha=alpha, normalize=True, random_state=9) Model = lasso_model.fit(X_train, y_train) y_pred_1 = lasso_model.predict(X_train) y_pred_2 = lasso_model.predict(X_test) rmse1 = float(np.sqrt(mean_squared_error(y_pred_1, y_train))) rmse2 = float(np.sqrt(mean_squared_error(y_pred_2, y_test))) return rmse1, rmse2
def polynomial(power=5, random_state=9): data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv',random_state=random_state) features = ['OverallQual','GrLivArea','GarageCars','GarageArea'] X_train = X_train[features] poly_model = make_pipeline(PolynomialFeatures(degree=power, include_bias=False), LinearRegression()) poly_model.fit(X_train, y_train) return poly_model
def lasso(alpha=0.01): data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv', random_state=9) lasso = Lasso(alpha=alpha, normalize=True) lasso.fit(X_train, y_train) y_pred_train = lasso.predict(X_train) y_pred_test = lasso.predict(X_test) rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train)) rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test)) return rmse_train, rmse_test
def polynomial(power=5, Random_state=9): data_set, X_train, X_test, y_train, y_test = load_data( 'data/house_prices_multivariate.csv') # rng = np.random.RandomState(Random_state) #x = 10 * rng.rand(50) X = (X_train.loc[:, ['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea']]) #Y = data_set.loc[:,'SalePrice'] #poly_model = PolynomialFeatures(degree = power, include_bias=False) #poly_x = poly_model.fit_transform(X) # regressor=LinearRegression() # Model_0 = regressor.fit(poly_x, Y) higher_polynomial = make_pipeline( PolynomialFeatures(power, include_bias=False), LinearRegression()) Model_0 = higher_polynomial.fit(X, y_train) return Model_0
def test_Max_important_feature(self): # Input parameters tests args = getargspec(Max_important_feature) self.assertEqual( len(args[0]), 3, "Expected argument(s) %d, Given %d" % (3, len(args[0]))) self.assertEqual( args[3], (4, ), "Expected default values do not match given default values") data_set, X_train, X_test, y_train, y_test = load_data( 'data/house_prices_multivariate.csv') arr = Max_important_feature(data_set, "SalePrice", 4) # Return value tests self.assertItemsEqual( arr, np.array(['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea'], dtype=object))
def test_Max_important_feature(self): # Input parameters tests args = getargspec(Max_important_feature) self.assertEqual(len(args[0]), 3, "Expected argument(s) %d, Given %d" % (3, len(args))) self.assertEqual( args[3], ('SalePrice', 4), "Expected default values do not match given default values") data_set, X_train, X_test, y_train, y_test = load_data( 'data/house_prices_multivariate.csv') arr = Max_important_feature(data_set) expected_list = [ 'OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea' ] # Return value tests self.assertListEqual( list(arr), expected_list, "Expected output does not match the given" "output")
# Default imports from sklearn.model_selection import cross_val_score import numpy as np from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data np.random.seed(9) # We have already loaded the data for you data_set, X_train, X_test, y_train, y_test = load_data( 'data/house_prices_multivariate.csv') def cross_validation(model, X, y): scores = cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv=5) return scores.mean() # Write your solution here