def test_feature_scaling(): matrix, dep_vars = cleanup_data(import_data()) matrix_train, matrix_test, dependent_train, dependent_test = training_set( encode_feature(matrix), encode_feature(dep_vars)) scaled_matrix_train, scaled_matrix_test = feature_scaling( matrix_train, matrix_test) assert scaled_matrix_train.shape == matrix_train.shape
def test_training_set(): matrix_train, matrix_test, dependent_train, dependent_test = training_set( *features_and_dependent_vars(import_data())) assert matrix_train.shape == (8, 3) assert matrix_test.shape == (2, 3) assert dependent_train.shape == (8, 1) assert dependent_test.shape == (2, 1)
def test_training_and_test_sets(): train_x, test_x, train_y, test_y = dp.training_set( *dp.features_and_dependent_vars(dp.import_data(DATA_FILE)), test_size=1 / 3) assert train_x[0][0] == 2.9 assert test_x[0][0] == 1.5 assert train_y[0][0] == 56642 assert test_y[0][0] == 37731
def test_predict(): train_x, test_x, train_y, test_y = dp.training_set( *dp.features_and_dependent_vars(dp.import_data(DATA_FILE)), test_size=1 / 3) machine = train_the_machine(train_x, train_y) predicted = predict(machine, train_x) data, max_error = error(train_y, predicted) assert max_error < 0.2 assert data['err'].mean() < 0.1
def test_train_the_machine(): train_x, _, train_y, _ = dp.training_set(*dp.features_and_dependent_vars( dp.import_data(DATA_FILE)), test_size=1 / 3) machine = train_the_machine(train_x, train_y) assert isinstance(machine, LinearRegression)