def objective03(automator, space):
        '''
        Objective function for Random Forest Regressor.
        '''
        algo = 'RandomForestRegressor'
        X = automator.x_train
        Y = automator.y_train
        #Define the subset of dictionary keys that should get passed to the machine learning
        #algorithm.

        keys = get_keys(algo)
        subspace = {k: space[k] for k in set(space).intersection(keys)}

        #Extract the remaining keys that are pertinent to data preprocessing.
        model = RandomForestRegressor(**subspace)
        scaler = space.get('scaler')
        num_features = space.get('k_best')

        #Assemble a data pipeline with the extracted data preprocessing keys.
        pipeline = []
        pipeline = Pipeline([
            ('scaler', scaler),
            ('select_best', SelectKBest(k=num_features)),
            ('classifier', model),
        ])

        #perform two passes of 10-fold cross validation and return the mean score.
        kfold = RepeatedKFold(n_splits=10, n_repeats=1)
        scores = -cross_val_score(pipeline,
                                  X,
                                  Y,
                                  cv=kfold,
                                  scoring=automator.score_metric,
                                  verbose=False).mean()
        return scores, algo
    def objective04(automator, space):
        '''
        Objective function for Support Vector Machines. Note that this method uses a Bagged Classifier 
        as a wrapper for SVC.  Support Vector Machine run time scales by O(N^3).  Using bagged classifiers
        break up the dataset into smaller samples so that runtime is manageable.
        '''
        algo = 'SVR'
        X = automator.x_train
        Y = automator.y_train

        #Define the subset of dictionary keys that should get passed to the machine learning
        #algorithm.

        keys = get_keys(algo)
        subspace = {k: space[k] for k in set(space).intersection(keys)}

        #Build a model with the parameters from our Hyperopt search space.

        n_estimators = space.get('n_estimators')
        model = BaggingRegressor(
            SVR(**subspace),
            max_samples=automator.num_samples // n_estimators,
            n_estimators=n_estimators,
        )

        scaler = space.get('scaler')
        num_features = space.get('k_best')

        #Assemble a data pipeline with the extracted data preprocessing keys.
        pipeline = []
        pipeline = Pipeline([
            ('scaler', scaler),
            ('select_best', SelectKBest(k=num_features)),
            ('classifier', model),
        ])

        #perform cross validation and return the mean score.
        kfold = RepeatedKFold(n_splits=automator.num_cv_folds,
                              n_repeats=automator.repeats)
        scores = -cross_val_score(pipeline,
                                  X,
                                  Y,
                                  cv=kfold,
                                  scoring=automator.score_metric,
                                  verbose=False,
                                  n_jobs=-1).mean()
        return scores, algo
示例#3
0
def get_model(algo, space_dict):

    keys = get_keys(algo)
    space = {k: space_dict[k] for k in set(space_dict).intersection(keys)}

    model_lib = {
        'xgboost_classifier': XGBClassifier,
        'xgboost_regressor': XGBRegressor,
        'SGDClassifier': SGDClassifier,
        'SGDRegressor': SGDRegressor,
        'RandomForestClassifier': RandomForestClassifier,
        'RandomForestRegressor': RandomForestRegressor,
        'SVC': SVC,
        'SVR': SVR,
        'LogisticRegression': LogisticRegression,
        'KNeighborClassifier': KNeighborsClassifier,
        'KNeighborRegressor': KNeighborsRegressor,
        'GaussianNB': GaussianNB,
    }

    return model_lib[algo](**space)
    def objective01(automator, space):
        '''
        Objective function for XGBoost Classifier.
        '''
        algo = 'xgboost_classifier'
        X = automator.x_train
        Y = automator.y_train
        
        #Define the subset of dictionary keys that should get passed to the machine learning
        #algorithm.
        
        keys = get_keys(algo)
        subspace = {k:space[k] for k in set(space).intersection(keys)}
        
        #Extract the remaining keys that are pertinent to data preprocessing.
        
        model = XGBClassifier(n_jobs=-1, **subspace)     
        scaler = space.get('scaler')
        num_features = space.get('k_best')
        
        #Assemble a data pipeline with the extracted data preprocessing keys.
        pipeline = []
        pipeline = Pipeline([
            ('scaler', scaler),
            ('select_best', SelectKBest(k = num_features)),
            ('classifier', model),
        ])
        
        #perform cross validation and return the mean score.
        kfold = RepeatedKFold(n_splits = automator.num_cv_folds, n_repeats = automator.repeats)

        try:
            scores = -cross_val_score(pipeline, X, Y, cv=kfold, scoring = automator.score_metric, verbose=False).mean()  
        except ValueError:
            print('An error occurred with the following space: ')
            print(space)
            return automator.best, algo    

        return scores, algo
    def objective05(automator, space):
        '''
        Objective function for K-Nearest Neighbors Voting Regressor.
        '''
        algo = 'KNeighborRegressor'
        X = automator.x_train
        Y = automator.y_train

        #Define the subset of dictionary keys that should get passed to the machine learning
        #algorithm.
        keys = get_keys(algo)
        subspace = {k: space[k] for k in set(space).intersection(keys)}

        #Build a model with the parameters from our Hyperopt search space.
        model = KNeighborsRegressor(n_jobs=-1, **subspace)
        scaler = space.get('scaler')
        num_features = space.get('k_best')

        #Assemble a data pipeline with the extracted data preprocessing keys.
        pipeline = []
        pipeline = Pipeline([
            ('scaler', scaler),
            ('select_best', SelectKBest(k=num_features)),
            ('classifier', model),
        ])

        #perform cross validation and return the mean score.
        kfold = RepeatedKFold(n_splits=automator.num_cv_folds,
                              n_repeats=automator.repeats)
        scores = -cross_val_score(pipeline,
                                  X,
                                  Y,
                                  cv=kfold,
                                  scoring=automator.score_metric,
                                  verbose=False).mean()
        return scores, algo
示例#6
0
 def test_get_keys(self):
     for key in ALGORITHM_KEYS.keys():
         self.assertIsNotNone(get_keys(key))
         print(get_keys(key))