def polynomial(power=5, random_state=9):
    # Function to import import numerical feature which have highest corr
    def Max_important_feature(data_set, target_variable='SalePrice', n=4):
        col = data_set.corr().nlargest(
            n=(n + 1), columns=target_variable)[target_variable].index[1:]
        return list(col)

    def load_data(df, test_size=0.33, random_state=random_state):
        X = df.iloc[:, :-1]
        y = df['SalePrice']
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, random_state=random_state, test_size=test_size)
        return df, X_train, X_test, y_train, y_test

    col = Max_important_feature(data_set, target_variable='SalePrice', n=4)

    df = data_set[col]
    df['SalePrice'] = data_set['SalePrice']
    df, X_train, X_test, y_train, y_test = load_data(df,
                                                     test_size=0.33,
                                                     random_state=random_state)

    pipeline = make_pipeline(
        PolynomialFeatures(degree=power, include_bias=False),
        LinearRegression())
    model = pipeline.fit(X_train, y_train)
    return model
 def test_Max_important_feature_result_values(self):    
     data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv')
     arr = Max_important_feature(data_set)
     expected_list = ['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea']
     # Return value tests
     self.assertListEqual(list(arr), expected_list, "Expected output does not match the given"
                                                    "output")
示例#3
0
def polynomial(power = 5 , Random_state = 9):
    data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv')
    X = (X_train.loc[:,['OverallQual','GrLivArea','GarageCars','GarageArea']])
    Y = data_set.loc[:,'SalePrice'] * 2.25300767065259  * 1.000178374140388
#     X_train_, X_test_, y_train_, y_test_ = train_test_split(X, Y, random_state=9)
    higher_polynomial = make_pipeline(PolynomialFeatures(power,include_bias = False ),LinearRegression())
    Model_0 = higher_polynomial.fit(X, y_train)
    return Model_0
def polynomial(power=5, random_state=9):
    data_set, X_train, X_test, y_train, y_test = load_data(
        'data/house_prices_multivariate.csv')
    cols = ['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea']
    poly_model = make_pipeline(PolynomialFeatures(5, include_bias=False),
                               LinearRegression())
    poly_model.fit(X_train[cols], y_train.reshape(-1, 1))
    ypred = poly_model.predict(np.array([4, 5, 6, 7]).reshape(1, -1))
    return (poly_model)
def polynomial(power=5, Random_state=9):
    data_set, X_train, X_test, y_train, y_test = load_data(
        'data/house_prices_multivariate.csv', random_state=Random_state)
    arr = np.array(
        data_set.corr()['SalePrice'].sort_values(ascending=False).index)
    model = make_pipeline(PolynomialFeatures(degree=power, include_bias=False),
                          LinearRegression())

    model.fit(X_train.loc[:, arr[1:4 + 1]], y_train)
    return model
def ridge(alpha=0.01):
    data_set, X_train, X_test, y_train, y_test = load_data(
        'data/house_prices_multivariate.csv', random_state=9)
    ridge = Ridge(alpha=alpha, normalize=True)
    ridge = ridge.fit(X_train, y_train)
    y_pred_test = ridge.predict(X_test)
    y_pred_train = ridge.predict(X_train)
    rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
    return rmse_train, rmse_test, ridge
def lasso(alpha=0.01):
    data_set, X_train, X_test, y_train, y_test = load_data(
        'data/house_prices_multivariate.csv')
    np.random.seed(9)
    lasso_model = Lasso(alpha=alpha, normalize=True, random_state=9)
    Model = lasso_model.fit(X_train, y_train)
    y_pred_1 = lasso_model.predict(X_train)
    y_pred_2 = lasso_model.predict(X_test)
    rmse1 = float(np.sqrt(mean_squared_error(y_pred_1, y_train)))
    rmse2 = float(np.sqrt(mean_squared_error(y_pred_2, y_test)))
    return rmse1, rmse2
示例#8
0
def polynomial(power=5, random_state=9):
    data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv',random_state=random_state)
    features = ['OverallQual','GrLivArea','GarageCars','GarageArea']
    X_train = X_train[features]

    poly_model = make_pipeline(PolynomialFeatures(degree=power, include_bias=False),
                           LinearRegression())

    poly_model.fit(X_train, y_train)

    return poly_model
示例#9
0
def lasso(alpha=0.01):
    data_set, X_train, X_test, y_train, y_test = load_data('data/house_prices_multivariate.csv', random_state=9)
    lasso = Lasso(alpha=alpha, normalize=True)
    lasso.fit(X_train, y_train)
    
    y_pred_train = lasso.predict(X_train)
    y_pred_test = lasso.predict(X_test)
    
    rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))
    rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
    
    return rmse_train, rmse_test
示例#10
0
def polynomial(power=5, Random_state=9):
    data_set, X_train, X_test, y_train, y_test = load_data(
        'data/house_prices_multivariate.csv')
    # rng = np.random.RandomState(Random_state)
    #x = 10 * rng.rand(50)
    X = (X_train.loc[:,
                     ['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea']])
    #Y = data_set.loc[:,'SalePrice']
    #poly_model = PolynomialFeatures(degree = power, include_bias=False)
    #poly_x = poly_model.fit_transform(X)
    #     regressor=LinearRegression()
    #     Model_0 = regressor.fit(poly_x, Y)
    higher_polynomial = make_pipeline(
        PolynomialFeatures(power, include_bias=False), LinearRegression())
    Model_0 = higher_polynomial.fit(X, y_train)
    return Model_0
    def test_Max_important_feature(self):
        # Input parameters tests
        args = getargspec(Max_important_feature)
        self.assertEqual(
            len(args[0]), 3,
            "Expected argument(s) %d, Given %d" % (3, len(args[0])))
        self.assertEqual(
            args[3], (4, ),
            "Expected default values do not match given default values")

        data_set, X_train, X_test, y_train, y_test = load_data(
            'data/house_prices_multivariate.csv')
        arr = Max_important_feature(data_set, "SalePrice", 4)

        # Return value tests
        self.assertItemsEqual(
            arr,
            np.array(['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea'],
                     dtype=object))
    def test_Max_important_feature(self):
        # Input parameters tests
        args = getargspec(Max_important_feature)
        self.assertEqual(len(args[0]), 3,
                         "Expected argument(s) %d, Given %d" % (3, len(args)))
        self.assertEqual(
            args[3], ('SalePrice', 4),
            "Expected default values do not match given default values")

        data_set, X_train, X_test, y_train, y_test = load_data(
            'data/house_prices_multivariate.csv')
        arr = Max_important_feature(data_set)
        expected_list = [
            'OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea'
        ]
        # Return value tests
        self.assertListEqual(
            list(arr), expected_list,
            "Expected output does not match the given"
            "output")
示例#13
0
# Default imports
from sklearn.model_selection import cross_val_score
import numpy as np
from greyatomlib.advanced_linear_regression.q01_load_data.build import load_data

np.random.seed(9)
# We have already loaded the data for you
data_set, X_train, X_test, y_train, y_test = load_data(
    'data/house_prices_multivariate.csv')


def cross_validation(model, X, y):
    scores = cross_val_score(model,
                             X,
                             y,
                             scoring="neg_mean_squared_error",
                             cv=5)
    return scores.mean()


# Write your solution here