from unittest import TestCase from ..build import visualise_data from inspect import getargspec import pandas as pd from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode import matplotlib.pyplot as plt from pandas.plotting import scatter_matrix path = 'data/student-mat.csv' df = load_data(path) x_train, x_test, y_train, y_test = split_dataset(df) # X_train, X_test = label_encode(x_train, x_test) X_train, X_test = label_encode(x_train, x_test) class TestLoad_data(TestCase): def test_args(self): # Input parameters tests args = getargspec(visualise_data) self.assertEqual(len(args[0]), 2, "Expected arguments %d, Given %d" % (2, len(args[0])))
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode from greyatomlib.multivariate_regression_project.q05_linear_regression_model.build import linear_regression from greyatomlib.multivariate_regression_project.q06_cross_validation.build import cross_validation_regressor df = load_data('data/student-mat.csv') x_train, x_test, y_train, y_test = split_dataset(df) x_train, x_test = label_encode(x_train, x_test) model = linear_regression(x_train, y_train) val = cross_validation_regressor(model, x_train, y_train) def regression_predictor(model, X, y): y_pred = model.predict(X) mse = mean_squared_error(y_pred=y_pred, y_true=y) mae = mean_absolute_error(y_pred=y_pred, y_true=y) r2 = r2_score(y_pred=y_pred, y_true=y) return y_pred, mse, mae, r2
# %load q02_data_split/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from sklearn.model_selection import train_test_split import pandas as pd df = 'data/student-mat.csv' data_1 = load_data(df) # Write your code below def split_dataset(data): #data_1 = pd.read_csv(data,sep=';') X = data_1.iloc[:, :-1] y = data_1.iloc[:, -1] x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42) return(x_train, x_test, y_train, y_test) split_dataset(df)