def test_logistic_regression_learning_rate_schedules(self, learning_rate_schedules, get_binary_classification_data): X, y = get_binary_classification_data clf = LogisticRegression(epochs=200, checkpoint=10, learning_rate=learning_rate_schedules, patience=40) clf.fit(X, y) # Confirm learning rates decreased assert clf.history.epoch_log.get('learning_rate')[0] > clf.history.epoch_log.get('learning_rate')[-1], "Learning rate didn't decrease" assert clf.history.epoch_log.get('learning_rate')[0] != clf.eta, "Learning rate didn't change"
def test_logistic_regression_validation(self, get_binary_classification_data): X, y = get_binary_classification_data clf = LogisticRegression(epochs=50, metric='mean') with pytest.raises(ValueError): clf.fit(X,y) clf = LogisticRegression(epochs=50, cost='quadratic') with pytest.raises(ValueError): clf.fit(X,y)
def get_classes(): c = Classes() classes = [LinearRegression(), LassoRegression(), RidgeRegression(), ElasticNetRegression(), LogisticRegression(), MultinomialLogisticRegression()] for cls in classes: c.add_class(cls) return c
def test_logistic_regression_predict(self, get_binary_classification_data): X, y = get_binary_classification_data clf = LogisticRegression(epochs=100, learning_rate=0.01, checkpoint=10) clf.fit(X,y) y_pred = clf._predict(X) assert y_pred.shape == (y.shape[0],), "y_pred has wrong shape for binary problem" y_pred = clf.predict(X) score = clf.score(X,y) assert y_pred.shape == (y.shape[0],), "y_pred has wrong shape for binary problem" assert score > 0.3, "Accuracy below 0.3" assert score < 1, "Accuracy is greater than or equal to 1"
def test_logistic_regression_history_w_early_stop(self, get_binary_classification_data): X, y = get_binary_classification_data es = EarlyStopImprovement() clf = LogisticRegression(epochs=10, early_stop=es) clf.fit(X, y) # Test epoch history assert clf.history.total_epochs == len(clf.history.epoch_log.get('epoch')), "number of epochs in log doesn't match epochs" assert clf.history.total_epochs == len(clf.history.epoch_log.get('learning_rate')), "number of learning rates in log doesn't match epochs" assert clf.history.total_epochs == len(clf.history.epoch_log.get('theta')), "number of thetas in log doesn't match epochs" assert clf.history.total_epochs == len(clf.history.epoch_log.get('train_cost')), "number of train costs in log doesn't match epochs" assert clf.history.total_epochs == len(clf.history.epoch_log.get('val_cost')), "number of val costs in log doesn't match epochs" assert clf.history.total_epochs == len(clf.history.epoch_log.get('train_score')), "number of train score in log doesn't match epochs" assert clf.history.total_epochs == len(clf.history.epoch_log.get('val_score')), "number of val score in log doesn't match epochs" assert all(np.equal(clf.theta, clf.history.epoch_log.get('theta')[-1])), "Last theta in log doesn't equal final theta." assert clf.history.epoch_log.get('train_cost')[0] > clf.history.epoch_log.get('train_cost')[-1], "train_cost does not decrease" #assert clf.history.epoch_log.get('train_score')[0] > clf.history.epoch_log.get('train_score')[-1], "train_score does not decrease" assert clf.history.epoch_log.get('val_cost')[0] > clf.history.epoch_log.get('val_cost')[-1], "val_cost does not decrease" #assert clf.history.epoch_log.get('val_score')[0] > clf.history.epoch_log.get('val_score')[-1], "val_score does not decrease" # Test batch history assert clf.history.total_batches == len(clf.history.batch_log.get('batch')), "number of batches in log doesn't match total batches" assert clf.history.total_batches == len(clf.history.batch_log.get('batch_size')), "number of batch sizes in log doesn't match total batches" assert clf.history.total_batches == len(clf.history.batch_log.get('theta')), "number of thetas in log doesn't match total batches" assert clf.history.total_batches == len(clf.history.batch_log.get('train_cost')), "number of train_costs in log doesn't match total batches"
def test_logistic_regression_name(self, get_binary_classification_data): X, y = get_binary_classification_data clf = LogisticRegression(epochs=50) clf.fit(X,y) assert clf.name == 'Logistic Regression with Batch Gradient Descent' clf = LogisticRegression(epochs=50, batch_size=1) clf.fit(X,y) assert clf.name == 'Logistic Regression with Stochastic Gradient Descent' clf = LogisticRegression(epochs=50, batch_size=32) clf.fit(X,y) assert clf.name == 'Logistic Regression with Minibatch Gradient Descent'
def test_logistic_regression_early_stop_from_estimator_val_score(self, get_binary_classification_data): X, y = get_binary_classification_data clf = LogisticRegression(epochs=5000, early_stop=True, val_size=0.3, metric='accuracy') clf.fit(X, y) assert clf.convergence_monitor.monitor == 'val_score', "Estimator is not sending correct metric"
def test_logistic_regression_early_stop_from_estimator_train_cost(self, get_binary_classification_data): X, y = get_binary_classification_data clf = LogisticRegression(epochs=5000, early_stop=False, val_size=0.3, metric=None) clf.fit(X, y) assert clf.convergence_monitor.monitor == 'train_cost', "Estimator is not sending correct metric"
# DATA # # ---------------------------------------------------------------------------- # #%% # Data X, y = datasets.load_breast_cancer(return_X_y=True) # Data transformation scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) X_train, X_test, y_train, y_test = train_test_split(X_scaled, y) # ---------------------------------------------------------------------------- # # LOGISTIC REGRESSION # # ---------------------------------------------------------------------------- # #%% # Linear Regression clf = LogisticRegression(epochs=500, learning_rate=0.05, metric='accuracy') clf.fit(X_train, y_train) history = clf.history costs = history.epoch_log['train_cost'] # ---------------------------------------------------------------------------- # # LEARNING CURVE # # ---------------------------------------------------------------------------- # #%% # Learning Curve data = go.Scatter(x=np.linspace(0, len(costs), len(costs)), y=costs, mode='lines', line=dict(color='steelblue')) layout = go.Layout(title='Wisconsin Breast Cancer Dataset Learning Curve', xaxis_title="Epochs", yaxis_title='Cross-Entropy Cost',