def test_linear_regression_validation(self, get_regression_data): X, y = get_regression_data with pytest.raises(ValueError): est = LinearRegression(metric='accuracy') est.fit(X, y) with pytest.raises(ValueError): est = LinearRegression(cost='binary_cross_entropy') est.fit(X, y)
def fit_multiple_models(get_regression_data): X, y = get_regression_data X = X[:,5] X = np.reshape(X, (-1,1)) models = {} bgd = LinearRegression(epochs=200, seed=50) sgd = LinearRegression(epochs=200, seed=50, batch_size=1) mgd = LinearRegression(epochs=200, seed=50, batch_size=32) models= {'Batch Gradient Descent': bgd.fit(X,y), 'Stochastic Gradient Descent': sgd.fit(X,y), 'Mini-batch Gradient Descent': mgd.fit(X,y)} return models
def test_residuals_leverage_plot(self, split_regression_data): X_train, X_test, y_train, y_test = split_regression_data model = LinearRegression(epochs=1000, metric='mape') v = ResidualsLeverage(model=model) v.fit(X_train, y_train) v.score(X_test, y_test) v.show()
def test_studentized_residual_plot(self, split_regression_data): X_train, X_test, y_train, y_test = split_regression_data model = LinearRegression(epochs=1000, metric='mae') v = StudentizedResiduals(model=model) v.fit(X_train, y_train) v.score(X_test, y_test) v.show()
def test_scale_location_plot(self, split_regression_data): X_train, X_test, y_train, y_test = split_regression_data model = LinearRegression(epochs=1000, metric='mae') v = ScaleLocation(model=model) v.fit(X_train, y_train) v.score(X_test, y_test) v.show()
def model_higher_is_better(request): model = LinearRegression(metric=request.param, early_stop=True, val_size=0.3, precision=0.1, patience=2) model.cost_function = RegressionCostFactory()(cost='quadratic') model.scorer = RegressionMetricFactory()(metric=request.param) return model
def test_cooks_distance_plot(self, split_regression_data): X_train, X_test, y_train, y_test = split_regression_data model = LinearRegression(epochs=1000, metric='mape') v = CooksDistance(model=model) v.fit(X_train, y_train) v.score(X_test, y_test) v.show()
def get_classes(): c = Classes() classes = [LinearRegression(), LassoRegression(), RidgeRegression(), ElasticNetRegression(), LogisticRegression(), MultinomialLogisticRegression()] for cls in classes: c.add_class(cls) return c
def predict_y(): X, y = datasets.load_boston(return_X_y=True) scaler = StandardScaler() X = scaler.fit_transform(X) gd = LinearRegression(epochs=5) gd.fit(X, y) y_pred = gd.predict(X) return y, y_pred
def test_early_stop_improvement_on_epoch_end_val_cost(self): stop=EarlyStopImprovement(monitor='val_cost', precision=0.1, patience=2) stop.model = LinearRegression(metric=None) stop.on_train_begin() logs = [{'val_cost': 100}, {'val_cost': 99},{'val_cost': 80}, {'val_cost': 78},{'val_cost': 77}] converged = [False, False, False, False, True] for i in range(len(logs)): stop.on_epoch_end(epoch=i+1, logs=logs[i]) assert stop.converged == converged[i], "not converging correctly"
def test_early_stop_generalization_loss_on_epoch_end(self): stop = EarlyStopGeneralizationLoss() stop.model = LinearRegression() logs = [{'val_cost': 100,'theta': np.random.rand(4)}, {'val_cost': 101,'theta': np.random.rand(4)}, {'val_cost': 120,'theta': np.random.rand(4)}] converged = [False,False, True] for i in range(len(logs)): stop.on_epoch_end(epoch=i+1, logs=logs[i]) assert stop.converged == converged[i], "not converging correctly" assert isinstance(stop.best_weights, (np.ndarray, np.generic)), "best_weights not np.array"
def test_inv_scaling_learning_rate_schedule(self, get_regression_data): exp_result = [0.1, 0.070710678, 0.057735027, 0.05, 0.04472136] act_result = [] lrs = InverseScaling(learning_rate=0.1, power=0.5) lrs.model = LinearRegression() iterations = [i + 1 for i in range(5)] for i in iterations: lrs.on_epoch_end(i) act_result.append(lrs.model.eta) assert all(np.isclose(exp_result, act_result, rtol=1e-1)), "Inverse scaling not working"
def test_productivity_curve(self, get_generated_medium_regression_data): X_train, y_train = get_generated_medium_regression_data model = LinearRegression(epochs=500, batch_size=32, metric='r2', verbose=False, val_size=0, early_stop=False) sizes = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000] cv = 5 est = ProductivityCurve(model=model, sizes=sizes, cv=cv) est.fit(X_train, y_train)
def test_kfold_cv(self, get_generated_medium_regression_data): X_train, y_train = get_generated_medium_regression_data model = LinearRegression(epochs=500, batch_size=32, metric='r2', verbose=False, val_size=0, early_stop=False) sizes = np.arange(start=100, stop=1100, step=100, dtype=np.int32) k = 5 est = KFoldCV(model=model, sizes=sizes, k=k) est.fit(X_train, y_train)
def test_early_stop_improvement_validation(self): with pytest.raises(ValueError): stop = EarlyStopImprovement(monitor=9) stop.model = LinearRegression(metric=None) stop.on_train_begin() with pytest.raises(ValueError): stop = EarlyStopImprovement(monitor='x') stop.model = LinearRegression(metric=None) stop.on_train_begin() with pytest.raises(TypeError): stop = EarlyStopImprovement(precision='x') stop.model = LinearRegression(metric=None) stop.on_train_begin() with pytest.raises(TypeError): stop = EarlyStopImprovement(precision=5) stop.model = LinearRegression(metric=None) stop.on_train_begin() with pytest.raises(TypeError): stop = EarlyStopImprovement(patience='x') stop.model = LinearRegression(metric=None) stop.on_train_begin() with pytest.raises(ValueError): stop = EarlyStopImprovement(monitor='val_score') stop.model = LinearRegression(metric=None) stop.on_train_begin()
def test_step_decay_learning_rate_schedule(self, get_regression_data): exp_result = [ 0.1000000000, 0.1000000000, 0.1000000000, 0.0500000000, 0.0500000000 ] act_result = [] lrs = StepDecay(learning_rate=0.1, decay_rate=0.5, decay_steps=5) lrs.model = LinearRegression() iterations = [i + 1 for i in range(5)] for i in iterations: lrs.on_epoch_end(i) act_result.append(lrs.model.eta) assert all(np.isclose(exp_result, act_result, rtol=1e-1)), "Step decay not working"
def test_polynomial_decay_learning_rate_schedule_wo_cycle( self, get_regression_data): exp_result = [0.0895, 0.0775, 0.0633, 0.0448, 0.0001] act_result = [] lrs = PolynomialDecay(learning_rate=0.1, decay_steps=5, power=0.5, end_learning_rate=0.0001) lrs.model = LinearRegression() iterations = [i + 1 for i in range(5)] for i in iterations: lrs.on_epoch_end(i) act_result.append(lrs.model.eta) assert all(np.isclose(exp_result, act_result, rtol=1e-1)), "Polynomial decay not working"
def test_time_decay_learning_rate_schedule_wo_staircase( self, get_regression_data): exp_result = [ 0.0909090909, 0.0833333333, 0.0769230769, 0.0714285714, 0.0666666667 ] act_result = [] lrs = TimeDecay(learning_rate=0.1, decay_rate=0.5, decay_steps=5) lrs.model = LinearRegression() iterations = [i + 1 for i in range(5)] for i in iterations: lrs.on_epoch_end(i) act_result.append(lrs.model.eta) assert all(np.isclose(exp_result, act_result, rtol=1e-1)), "Time decay not working"
def test_exp_decay_learning_rate_schedule_w_staircase( self, get_regression_data): exp_result = [0.1, 0.1, 0.1, 0.1, 0.05] act_result = [] lrs = ExponentialDecay(learning_rate=0.1, decay_rate=0.5, decay_steps=5, staircase=True) lrs.model = LinearRegression() iterations = [i + 1 for i in range(5)] for i in iterations: lrs.on_epoch_end(i) act_result.append(lrs.model.eta) assert all(np.isclose(exp_result, act_result, rtol=1e-1) ), "Exponential decay with steps and staircase not working"
def test_exp_decay_learning_rate_schedule_wo_staircase( self, get_regression_data): exp_result = [ 0.0870550563, 0.0757858283, 0.0659753955, 0.0574349177, 0.0500000000 ] act_result = [] lrs = ExponentialDecay(learning_rate=0.1, decay_rate=0.5, decay_steps=5) lrs.model = LinearRegression() iterations = [i + 1 for i in range(5)] for i in iterations: lrs.on_epoch_end(i) act_result.append(lrs.model.eta) assert all(np.isclose(exp_result, act_result, rtol=1e-1)), "Exponential decay not working"
def test_nat_exp_decay_learning_rate_schedule_wo_staircase( self, get_regression_data): exp_result = [ 0.0904837418, 0.0818730753, 0.0740818221, 0.0670320046, 0.0606530660 ] act_result = [] lrs = NaturalExponentialDecay(learning_rate=0.1, decay_rate=0.5, decay_steps=5) lrs.model = LinearRegression() iterations = [i + 1 for i in range(5)] for i in iterations: lrs.on_epoch_end(i) act_result.append(lrs.model.eta) assert all( np.isclose(exp_result, act_result, rtol=1e-1)), "Natural exponential decay not working"
def test_early_stop_strips_on_epoch_end(self): # Obtain train and validation costs filename = "tests/test_operations/test_early_stop.xlsx" df = pd.read_excel(io=filename, sheet_name='strips_data') val_costs = df['val_cost'] logs = [] for i in range(len(val_costs)): log = {'val_cost': val_costs[i]} logs.append(log) # Instantiate and test early stop stop = EarlyStopStrips(patience=3) stop.model = LinearRegression() stop.on_train_begin() for i in range(len(val_costs)): stop.on_epoch_end(epoch=i+1, logs=logs[i]) if i < len(val_costs)-1: assert stop.converged == False, "not converging at the appropriate time" else: assert stop.converged == True, "not converging at the appropriate time"
def test_adaptive_learning_rate_schedule(self, get_regression_data): logs = {} exp_result = [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.05, 0.05, 0.05 ] act_result = [] lrs = Adaptive(learning_rate=0.1, decay_rate=0.5, precision=0.01, patience=5) lrs.model = LinearRegression() lrs.model.eta = 0.1 logs['learning_rate'] = 0.1 cost = [5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 3] iterations = [i + 1 for i in range(12)] for i in iterations: logs['train_cost'] = cost[i - 1] lrs.on_epoch_end(i, logs) act_result.append(lrs.model.eta) logs['learning_rate'] = lrs.model.eta assert all( np.isclose(exp_result, act_result, rtol=1e-1)), "Adaptive decay with cycle not working"
def test_training_curve(self, get_regression_data): X_train, y_train = get_regression_data model = LinearRegression(epochs=1000, metric='mape') v = TrainingCurve(model=model) v.fit(X_train, y_train)
def test_linear_regression_name(self, get_regression_data): X, y = get_regression_data est = LinearRegression() est.fit(X, y) assert est.name == "Linear Regression with Batch Gradient Descent", "incorrect name"
def models_by_metric(request): model = LinearRegression(metric=request.param) model.cost_function = RegressionCostFactory()(cost='quadratic') model.scorer = RegressionMetricFactory()(metric=request.param) return model
def test_qq_plot_plot(self, split_regression_data): X_train, _, y_train, _, = split_regression_data model = LinearRegression(epochs=1000, metric='mae') v = QQPlot(model=model) v.fit(X_train, y_train) v.show()
# Linear Regression Scatterplot data = go.Scatter(x=X[:, 0], y=y, mode='markers', marker=dict(color='steelblue')) layout = go.Layout(title='Boston Housing Prices by Rooms', height=400, width=800, showlegend=False, xaxis_title="Average No. Rooms", yaxis_title="Median Price ($000)", margin=dict(l=10, r=10, t=40, b=10), template='plotly_white') fig = go.Figure(data=data, layout=layout) fig.show() po.plot(fig, filename="./content/figures/boston.html", auto_open=False) # ---------------------------------------------------------------------------- # # LINEAR REGRESSION # # ---------------------------------------------------------------------------- # #%% # Linear Regression lr = LinearRegression(epochs=50, learning_rate=0.05) lr.fit(X_scaled, y) plot = SingleModelSearch3D() plot.search(lr, directory=directory, filename="linear_regression_search.gif") plot = SingleModelFit2D() plot.fit(lr, directory=directory, filename="linear_regression_fit.gif") #%% # %%
fig.show() po.plot(fig, filename="./content/figures/simulated_training_data.html", auto_open=False) # ---------------------------------------------------------------------------- # # LINEAR REGRESSION # # ---------------------------------------------------------------------------- # #%% # Linear Regression scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) lr = LinearRegression(epochs=1000, learning_rate=0.01, val_size=0.2, patience=40, early_stop=True, metric='r2', verbose=True, checkpoint=100) lr.fit(X_train, y_train) print(lr.intercept_) print(lr.coef_.shape) # ---------------------------------------------------------------------------- # # ANIMATIONS # # ---------------------------------------------------------------------------- # #%% # Animations plot = SingleModelSearch3D() plot.search(lr, directory=directory, filename="linear_regression_search_test.gif")