from unittest import TestCase
from ..build import visualise_data
from inspect import getargspec
import pandas as pd
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix

path = 'data/student-mat.csv'
df = load_data(path)
x_train, x_test, y_train, y_test = split_dataset(df)
# X_train, X_test = label_encode(x_train, x_test)
X_train, X_test = label_encode(x_train, x_test)


class TestLoad_data(TestCase):
    def test_args(self):  # Input parameters tests
        args = getargspec(visualise_data)
        self.assertEqual(len(args[0]), 2,
                         "Expected arguments %d, Given %d" % (2, len(args[0])))
示例#2
0
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset

from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode
from greyatomlib.multivariate_regression_project.q05_linear_regression_model.build import linear_regression
from greyatomlib.multivariate_regression_project.q06_cross_validation.build import cross_validation_regressor

df = load_data('data/student-mat.csv')

x_train, x_test, y_train, y_test = split_dataset(df)

x_train, x_test = label_encode(x_train, x_test)

model = linear_regression(x_train, y_train)

val = cross_validation_regressor(model, x_train, y_train)


def regression_predictor(model, X, y):
    y_pred = model.predict(X)
    mse = mean_squared_error(y_pred=y_pred, y_true=y)
    mae = mean_absolute_error(y_pred=y_pred, y_true=y)
    r2 = r2_score(y_pred=y_pred, y_true=y)
    return y_pred, mse, mae, r2
# %load q02_data_split/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from sklearn.model_selection import train_test_split
import pandas as pd
df = 'data/student-mat.csv'
data_1 = load_data(df)

# Write your code below
def split_dataset(data):
    
    #data_1 = pd.read_csv(data,sep=';')
    X = data_1.iloc[:, :-1]
    y = data_1.iloc[:, -1]
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)
    return(x_train, x_test, y_train, y_test)
    
split_dataset(df)