def test_less_than_three_columns_raises_error_with_correct_message(self): try: calculate_random_forest_mtry_hyperparameter(2, 'classification') # Fail the test if the above call doesn't throw an error self.fail() except HealthcareAIError as e: self.assertEqual(e.message, 'You need more than two columns to tune hyperparameters.')
def test_non_integer_columns_raises_error(self): try: calculate_random_forest_mtry_hyperparameter('regression_metrics', 'classification') # Fail the test if the above call doesn't throw an error self.fail() except HealthcareAIError as e: self.assertEqual(e.message, 'The number_of_columns must be an integer')
def test_bad_model_type_raises_error_with_correct_message(self): try: calculate_random_forest_mtry_hyperparameter(3, 'regression_metrics') # Fail the test if the above call doesn't throw an error self.fail() except HealthcareAIError as e: self.assertEqual(e.message, 'Please specify model type of \'regression\' or \'classification\'')
def test_negative_columns_raises_error_with_correct_message(self): try: calculate_random_forest_mtry_hyperparameter(-10, 'classification') # Fail the test if the above call doesn't throw an error self.fail() except HealthcareAIError as e: self.assertEqual(e.message, 'You need more than two columns to tune hyperparameters.')
def random_forest_regressor(self, trees=200, scoring_metric='neg_mean_squared_error', hyperparameter_grid=None, randomized_search=True, number_iteration_samples=5): """ A light wrapper for Sklearn's random forest regressor that performs randomized search over an overridable default hyperparameter grid. Args: trees (int): number of trees to use if not performing a randomized grid search scoring_metric (str): Any sklearn scoring metric appropriate for regression hyperparameter_grid (dict): hyperparameters by name randomized_search (bool): True for randomized search (default) number_iteration_samples (int): Number of models to train during the randomized search for exploring the hyperparameter space. More may lead to a better model, but will take longer. Returns: TrainedSupervisedModel: """ self.validate_regression('Random Forest Regressor') if hyperparameter_grid is None: max_features = hcai_helpers.calculate_random_forest_mtry_hyperparameter( len(self.X_test.columns), self.model_type) hyperparameter_grid = { 'n_estimators': [10, 50, 200], 'max_features': max_features } number_iteration_samples = 5 algorithm = get_algorithm( RandomForestRegressor, scoring_metric, hyperparameter_grid, randomized_search, number_iteration_samples=number_iteration_samples, n_estimators=trees) trained_supervised_model = self._create_trained_supervised_model( algorithm) return trained_supervised_model
def random_forest_regressor(self, trees=200, scoring_metric='neg_mean_squared_error', hyperparameter_grid=None, randomized_search=True, number_iteration_samples=5): """ A light wrapper for Sklearn's random forest regressor that performs randomized search over an overridable default hyperparameter grid. Args: trees (int): number of trees to use if not performing a randomized grid search scoring_metric (str): Any sklearn scoring metric appropriate for regression hyperparameter_grid (dict): hyperparameters by name randomized_search (bool): True for randomized search (default) number_iteration_samples (int): Number of models to train during the randomized search for exploring the hyperparameter space. More may lead to a better model, but will take longer. Returns: TrainedSupervisedModel: """ self.validate_regression('Random Forest Regressor') if hyperparameter_grid is None: max_features = hcai_helpers.calculate_random_forest_mtry_hyperparameter(len(self.X_test.columns), self.model_type) hyperparameter_grid = {'n_estimators': [10, 50, 200], 'max_features': max_features} number_iteration_samples = 5 algorithm = get_algorithm(RandomForestRegressor, scoring_metric, hyperparameter_grid, randomized_search, number_iteration_samples=number_iteration_samples, n_estimators=trees) trained_supervised_model = self._create_trained_supervised_model(algorithm) return trained_supervised_model
def test_one_hundred_columns_regression(self): result = calculate_random_forest_mtry_hyperparameter(100, 'regression') self.assertEqual(result, [32, 33, 34])
def test_one_hundred_columns_classification(self): result = calculate_random_forest_mtry_hyperparameter( 100, 'classification') self.assertEqual(result, [9, 10, 11])
def test_ten_columns_regression(self): result = calculate_random_forest_mtry_hyperparameter(10, 'regression') self.assertEqual(result, [2, 3, 4])
def test_three_columns_regression(self): result = calculate_random_forest_mtry_hyperparameter(3, 'regression') self.assertEqual(result, [1, 2, 3])
def test_one_hundred_columns_classification(self): result = calculate_random_forest_mtry_hyperparameter(100, 'classification') self.assertEqual(result, [9, 10, 11])