def lasso_regression(self, scoring_metric='neg_mean_squared_error', hyperparameter_grid=None, randomized_search=True, number_iteration_samples=2): """ A light wrapper for Sklearn's lasso regression that performs randomized search over an overridable default hyperparameter grid. Args: scoring_metric (str): Any sklearn scoring metric appropriate for regression hyperparameter_grid (dict): hyperparameters by name randomized_search (bool): True for randomized search (default) number_iteration_samples (int): Number of models to train during the randomized search for exploring the hyperparameter space. More may lead to a better model, but will take longer. Returns: TrainedSupervisedModel: """ self.validate_regression('Lasso Regression') if hyperparameter_grid is None: hyperparameter_grid = {"fit_intercept": [True, False]} number_iteration_samples = 2 algorithm = get_algorithm( Lasso, scoring_metric, hyperparameter_grid, randomized_search, number_iteration_samples=number_iteration_samples) trained_supervised_model = self._create_trained_supervised_model( algorithm) return trained_supervised_model
def logistic_regression(self, scoring_metric='roc_auc', hyperparameter_grid=None, randomized_search=True, number_iteration_samples=10): """ A light wrapper for Sklearn's logistic regression that performs randomized search over an overideable default hyperparameter grid. Args: scoring_metric (str): Any sklearn scoring metric appropriate for regression hyperparameter_grid (dict): hyperparameters by name randomized_search (bool): True for randomized search (default) number_iteration_samples (int): Number of models to train during the randomized search for exploring the hyperparameter space. More may lead to a better model, but will take longer. Returns: TrainedSupervisedModel: """ self.validate_classification('Logistic Regression') if hyperparameter_grid is None: hyperparameter_grid = {'C': [0.01, 0.1, 1, 10, 100], 'class_weight': [None, 'balanced']} number_iteration_samples = 10 algorithm = get_algorithm(LogisticRegression, scoring_metric, hyperparameter_grid, randomized_search, number_iteration_samples=number_iteration_samples) trained_supervised_model = self._create_trained_supervised_model(algorithm) return trained_supervised_model
def knn(self, scoring_metric='roc_auc', hyperparameter_grid=None, randomized_search=True, number_iteration_samples=10): """ A light wrapper for Sklearn's knn classifier that performs randomized search over an overridable default hyperparameter grid. Args: scoring_metric (str): Any sklearn scoring metric appropriate for classification hyperparameter_grid (dict): hyperparameters by name randomized_search (bool): True for randomized search (default) number_iteration_samples (int): Number of models to train during the randomized search for exploring the hyperparameter space. More may lead to a better model, but will take longer. Returns: TrainedSupervisedModel: """ self.validate_classification('KNN') if hyperparameter_grid is None: neighbors = list(range(5, 26)) hyperparameter_grid = {'n_neighbors': neighbors, 'weights': ['uniform', 'distance']} number_iteration_samples = 10 print('KNN Grid: {}'.format(hyperparameter_grid)) algorithm = get_algorithm(KNeighborsClassifier, scoring_metric, hyperparameter_grid, randomized_search, number_iteration_samples=number_iteration_samples) trained_supervised_model = self._create_trained_supervised_model(algorithm) return trained_supervised_model
def lasso_regression(self, scoring_metric='neg_mean_squared_error', hyperparameter_grid=None, randomized_search=True, number_iteration_samples=2): """ A light wrapper for Sklearn's lasso regression that performs randomized search over an overridable default hyperparameter grid. Args: scoring_metric (str): Any sklearn scoring metric appropriate for regression hyperparameter_grid (dict): hyperparameters by name randomized_search (bool): True for randomized search (default) number_iteration_samples (int): Number of models to train during the randomized search for exploring the hyperparameter space. More may lead to a better model, but will take longer. Returns: TrainedSupervisedModel: """ self.validate_regression('Lasso Regression') if hyperparameter_grid is None: hyperparameter_grid = {"fit_intercept": [True, False]} number_iteration_samples = 2 algorithm = get_algorithm(Lasso, scoring_metric, hyperparameter_grid, randomized_search, number_iteration_samples=number_iteration_samples) trained_supervised_model = self._create_trained_supervised_model(algorithm) return trained_supervised_model
def random_forest_regressor(self, trees=200, scoring_metric='neg_mean_squared_error', hyperparameter_grid=None, randomized_search=True, number_iteration_samples=5): """ A light wrapper for Sklearn's random forest regressor that performs randomized search over an overridable default hyperparameter grid. Args: trees (int): number of trees to use if not performing a randomized grid search scoring_metric (str): Any sklearn scoring metric appropriate for regression hyperparameter_grid (dict): hyperparameters by name randomized_search (bool): True for randomized search (default) number_iteration_samples (int): Number of models to train during the randomized search for exploring the hyperparameter space. More may lead to a better model, but will take longer. Returns: TrainedSupervisedModel: """ self.validate_regression('Random Forest Regressor') if hyperparameter_grid is None: max_features = hcai_helpers.calculate_random_forest_mtry_hyperparameter( len(self.X_test.columns), self.model_type) hyperparameter_grid = { 'n_estimators': [10, 50, 200], 'max_features': max_features } number_iteration_samples = 5 algorithm = get_algorithm( RandomForestRegressor, scoring_metric, hyperparameter_grid, randomized_search, number_iteration_samples=number_iteration_samples, n_estimators=trees) trained_supervised_model = self._create_trained_supervised_model( algorithm) return trained_supervised_model
def random_forest_regressor(self, trees=200, scoring_metric='neg_mean_squared_error', hyperparameter_grid=None, randomized_search=True, number_iteration_samples=5): """ A light wrapper for Sklearn's random forest regressor that performs randomized search over an overridable default hyperparameter grid. Args: trees (int): number of trees to use if not performing a randomized grid search scoring_metric (str): Any sklearn scoring metric appropriate for regression hyperparameter_grid (dict): hyperparameters by name randomized_search (bool): True for randomized search (default) number_iteration_samples (int): Number of models to train during the randomized search for exploring the hyperparameter space. More may lead to a better model, but will take longer. Returns: TrainedSupervisedModel: """ self.validate_regression('Random Forest Regressor') if hyperparameter_grid is None: max_features = hcai_helpers.calculate_random_forest_mtry_hyperparameter(len(self.X_test.columns), self.model_type) hyperparameter_grid = {'n_estimators': [10, 50, 200], 'max_features': max_features} number_iteration_samples = 5 algorithm = get_algorithm(RandomForestRegressor, scoring_metric, hyperparameter_grid, randomized_search, number_iteration_samples=number_iteration_samples, n_estimators=trees) trained_supervised_model = self._create_trained_supervised_model(algorithm) return trained_supervised_model