Exemplo n.º 1
0
from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode

from greyatomlib.multivariate_regression_project.q07_regression_pred.build import regression_predictor
from sklearn.linear_model import Ridge
import numpy as np
import pandas as pd

from greyatomlib.multivariate_regression_project.q06_cross_validation.build import cross_validation_regressor
np.random.seed(9)

df = load_data('data/student-mat.csv')
 
x_train, x_test, y_train, y_test =  split_dataset(df)

x_train,x_test = label_encode(x_train,x_test)

# Write your code below
def ridge(x_train,x_test,y_train,y_test,alpha=0.1):
    ridge_regressor = Ridge(alpha=alpha,normalize=True)
    ridge_regressor.fit(x_train,y_train)
    y_pred,mse,mae,r2 = regression_predictor(ridge_regressor,x_test,y_test)
    val = cross_validation_regressor(ridge_regressor,x_train,y_train)
    scores = pd.DataFrame()
    scores['cross_val'] = pd.Series(val)
    scores['mae']=pd.Series(mae)
    scores['r2']=pd.Series(r2)
    scores['mse']= pd.Series(mse**0.5)
    return ridge_regressor,y_pred,scores
ridge(x_train,x_test,y_train,y_test,0.1)
Exemplo n.º 2
0
def ohe_encode(X_train, X_test, defaults=category_index):
    X_transform, X_test_transform = label_encode(X_train, X_test)
    return X_transform, X_test_transform
# %load q04_data_visualisation/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode

import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
# %matploylib.inline

data = load_data('data/student-mat.csv') 
X_train, X_test, y_train, y_test =  split_dataset(data)
X_train,X_test = label_encode(X_train,X_test)

# Write your code below
def visualise_data(data,figname):
    
    scatter_matrix(data, alpha=0.2, figsize=(15,15), diagonal='kde')
    plt.show()
    
# visualise_data(data,'figname')


def ohe_encode(x_train,x_test,category_index=category_index):
    x_train,x_test=label_encode(x_train,x_test)
    ohe = OneHotEncoder(categorical_features=category_index,sparse=False)
    X_transform = ohe.fit_transform(x_train)
    X_test_trasnform = ohe.fit_transform(x_test)
    return X_transform,X_test_trasnform
# %load q12_feature_selection/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset

from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode

from greyatomlib.multivariate_regression_project.q11_feature_selection_q02_best_k_features.build import percentile_k_features

from greyatomlib.multivariate_regression_project.q11_feature_selection_q01_plot_corr.build import plot_corr
from greyatomlib.multivariate_regression_project.q12_feature_selection.build import feature_selection

import pandas as pd
df = load_data('data/student-mat.csv')
X = df.drop(df.columns[len(df.columns) - 1], axis=1)
y = df.iloc[:, -1]
x_train, x_test, y_train, y_test = split_dataset(df)
X, _ = label_encode(X, x_train)


def pick_features(X, y, k=50):
    k_best_features = percentile_k_features(X, y, k)
    return k_best_features