def test_linear_regression(self): trained_linear_model = self.regression_trainer.linear_regression() self.assertIsInstance(trained_linear_model, TrainedSupervisedModel) result = trained_linear_model.metrics helpers.assertBetween(self, 500, 750, result['mean_squared_error']) helpers.assertBetween(self, 18, 29, result['mean_absolute_error'])
def test_knn(self): trained_knn = self.classification_trainer.knn() result = trained_knn.metrics self.assertIsInstance(trained_knn, TrainedSupervisedModel) helpers.assertBetween(self, 0.5, 0.95, result['roc_auc']) helpers.assertBetween(self, 0.79, 0.95, result['accuracy'])
def test_random_forest_classification(self): # Force plot to save to prevent blocking trained_random_forest = self.classification_trainer.random_forest_classification(save_plot=True) result = trained_random_forest.metrics self.assertIsInstance(trained_random_forest, TrainedSupervisedModel) helpers.assertBetween(self, 0.65, 0.95, result['roc_auc']) helpers.assertBetween(self, 0.8, 0.95, result['accuracy'])
def test_logistic_regression(self): trained_lr = self.classification_trainer.logistic_regression() self.assertIsInstance(trained_lr, TrainedSupervisedModel) result = trained_lr.metrics helpers.assertBetween(self, 0.52, 0.95, result['roc_auc']) helpers.assertBetween(self, 0.6, 0.95, result['accuracy'])
def test_ensemble_classification(self): trained_ensemble = self.classification_trainer.ensemble() self.assertIsInstance(trained_ensemble, TrainedSupervisedModel) result = trained_ensemble.metrics helpers.assertBetween(self, 0.6, 0.97, result['roc_auc']) helpers.assertBetween(self, 0.6, 0.97, result['accuracy'])
def test_random_forest_regression(self): trained_rf_regressor = self.regression_trainer.random_forest_regression() self.assertIsInstance(trained_rf_regressor, TrainedSupervisedModel) result = trained_rf_regressor.metrics helpers.assertBetween(self, 350, 750, result['mean_squared_error']) helpers.assertBetween(self, 10, 25, result['mean_absolute_error'])
def test_linear_regression(self): trained_linear_model = self.regression_trainer.linear_regression() self.assertIsInstance(trained_linear_model, TrainedSupervisedModel) result = trained_linear_model.metrics helpers.assertBetween(self, 450, 800, result['mean_squared_error']) helpers.assertBetween(self, 16, 29, result['mean_absolute_error'])
def test_ensemble_classification(self): trained_ensemble = self.classification_trainer.ensemble() self.assertIsInstance(trained_ensemble, TrainedSupervisedModel) result = trained_ensemble.metrics helpers.assertBetween(self, 0.6, 0.95, result['roc_auc']) helpers.assertBetween(self, 0.6, 0.95, result['accuracy'])
def test_random_forest_regression(self): trained_rf_regressor = self.regression_trainer.random_forest_regression() self.assertIsInstance(trained_rf_regressor, TrainedSupervisedModel) result = trained_rf_regressor.metrics helpers.assertBetween(self, 400, 700, result['mean_squared_error']) helpers.assertBetween(self, 10, 20, result['mean_absolute_error'])
def test_knn(self): trained_knn = self.classification_trainer.knn() result = trained_knn.metrics self.assertIsInstance(trained_knn, TrainedSupervisedModel) helpers.assertBetween(self, 0.5, 0.85, result['roc_auc']) helpers.assertBetween(self, 0.79, 0.95, result['accuracy'])
def test_pr(self): df = pd.DataFrame({'a': np.repeat(np.arange(.1, 1.1, .1), 10)}) b = np.repeat(0, 100) b[[56, 62, 63, 68, 74, 75, 76, 81, 82, 84, 85, 87, 88] + list(range(90, 100))] = 1 df['b'] = b # PR_AUC out = hcai_eval.compute_pr(df['b'], df['a']) test_helpers.assertBetween(self, 0.8, 0.87, out['pr_auc']) self.assertAlmostEqual(round(out['best_precision'], 4), 0.8000) self.assertAlmostEqual(round(out['best_recall'], 4), 0.6957)
def test_random_forest_classification(self): # Force plot to save to prevent matplotlib blocking during testing trained_random_forest = self.classification_trainer.random_forest_classification(save_plot=True) result = trained_random_forest.metrics self.assertIsInstance(trained_random_forest, TrainedSupervisedModel) helpers.assertBetween(self, 0.65, 0.95, result['roc_auc']) helpers.assertBetween(self, 0.8, 0.95, result['accuracy']) # Clean up saved plot (see note above) try: os.remove('FeatureImportances.png') except OSError: pass
def test_random_forest_classification(self): # Force plot to save to prevent blocking when testing trained_random_forest = self.classification_trainer.random_forest_classification(save_plot=True) result = trained_random_forest.metrics self.assertIsInstance(trained_random_forest, TrainedSupervisedModel) helpers.assertBetween(self, 0.65, 0.95, result['roc_auc']) helpers.assertBetween(self, 0.8, 0.95, result['accuracy']) # Clean up saved plot (see note above) try: os.remove('FeatureImportances.png') except OSError: pass
def test_logistic_regression_no_tuning(self): self.assertIsInstance(self.lr, TrainedSupervisedModel) test_helpers.assertBetween(self, 0.5, 0.8, self.lr.metrics['roc_auc'])
def test_random_forest_tuning(self): rf = self.trainer.random_forest_classifier(randomized_search=True) self.assertIsInstance(rf, TrainedSupervisedModel) test_helpers.assertBetween(self, 0.7, 0.97, rf.metrics['roc_auc'])