from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode from greyatomlib.multivariate_regression_project.q07_regression_pred.build import regression_predictor from sklearn.linear_model import Ridge import numpy as np import pandas as pd from greyatomlib.multivariate_regression_project.q06_cross_validation.build import cross_validation_regressor np.random.seed(9) df = load_data('data/student-mat.csv') x_train, x_test, y_train, y_test = split_dataset(df) x_train,x_test = label_encode(x_train,x_test) # Write your code below def ridge(x_train,x_test,y_train,y_test,alpha=0.1): ridge_regressor = Ridge(alpha=alpha,normalize=True) ridge_regressor.fit(x_train,y_train) y_pred,mse,mae,r2 = regression_predictor(ridge_regressor,x_test,y_test) val = cross_validation_regressor(ridge_regressor,x_train,y_train) scores = pd.DataFrame() scores['cross_val'] = pd.Series(val) scores['mae']=pd.Series(mae) scores['r2']=pd.Series(r2) scores['mse']= pd.Series(mse**0.5) return ridge_regressor,y_pred,scores ridge(x_train,x_test,y_train,y_test,0.1)
def ohe_encode(X_train, X_test, defaults=category_index): X_transform, X_test_transform = label_encode(X_train, X_test) return X_transform, X_test_transform
# %load q04_data_visualisation/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode import matplotlib.pyplot as plt from pandas.plotting import scatter_matrix # %matploylib.inline data = load_data('data/student-mat.csv') X_train, X_test, y_train, y_test = split_dataset(data) X_train,X_test = label_encode(X_train,X_test) # Write your code below def visualise_data(data,figname): scatter_matrix(data, alpha=0.2, figsize=(15,15), diagonal='kde') plt.show() # visualise_data(data,'figname')
def ohe_encode(x_train,x_test,category_index=category_index): x_train,x_test=label_encode(x_train,x_test) ohe = OneHotEncoder(categorical_features=category_index,sparse=False) X_transform = ohe.fit_transform(x_train) X_test_trasnform = ohe.fit_transform(x_test) return X_transform,X_test_trasnform
# %load q12_feature_selection/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode from greyatomlib.multivariate_regression_project.q11_feature_selection_q02_best_k_features.build import percentile_k_features from greyatomlib.multivariate_regression_project.q11_feature_selection_q01_plot_corr.build import plot_corr from greyatomlib.multivariate_regression_project.q12_feature_selection.build import feature_selection import pandas as pd df = load_data('data/student-mat.csv') X = df.drop(df.columns[len(df.columns) - 1], axis=1) y = df.iloc[:, -1] x_train, x_test, y_train, y_test = split_dataset(df) X, _ = label_encode(X, x_train) def pick_features(X, y, k=50): k_best_features = percentile_k_features(X, y, k) return k_best_features