Пример #1
0
def get_trained_model(dataset,
                      label,
                      model_name,
                      task,
                      method_name='standard',
                      max_evals=100,
                      test_size=0.3,
                      random_state=1):
    '''
    Train the model with given data and hpo method . Returns the trained model

            Parameters:
                    dataset(dataframe) : data to be used for training model
                    label (string): target column of the dataframe
                    task (string) : type of task
                    model_name(string) : name of the model on which data is to be trained
                    method_name(string) : Name of the hyper parameter method to be used while training
                    max_evals(int) : Number of evaluators
                    test_size(float) : Fraction of the data to be used for testing
                    random_state(int) : Random state to be used

            Returns:
                    model (model object) : the trained model on which hpo is performed
    '''
    features = get_features(dataset, label)
    X, Y = dataset[features], dataset[label]
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=test_size, random_state=random_state)
    model = get_model(model_name)
    if method_name == 'standard':
        model = model()
        trained_model = model.fit(X_train, Y_train)
        print('Standard Model without HPO')
        print('Model Trained')
        return trained_model
    elif method_name == 'grid_search':
        trained_model = grid_search(model, X_train, Y_train)
    elif method_name == 'random_search':
        trained_model = random_search(model, X_train, Y_train)
    # elif method_name == 'bayesian_gp':
    #     trained_model = bayesian_gp(model, X_train, Y_train)
    elif method_name == 'bayesian_tpe':
        trained_model = bayesian_tpe(model,
                                     X_train,
                                     X_test,
                                     Y_train,
                                     Y_test,
                                     task,
                                     max_evals=max_evals)
    else:
        raise Exception("'No hpo method named {}'.format(method_name)")

    print('Hyperparameters Optimized')
    print('Model Trained')
    return trained_model


#---------------------------------------------------------------------------------------------------------------------#
Пример #2
0
    def add_models(self, base_layer_models):
        '''
        Initializes the base layer models of the object

                Parameters:
                        self (object ref)
                        base_layer_model (list) : list of models to be used at base layer in ensembling
                        
                        
        '''
        self.models.extend([get_model(model) for model in base_layer_models])
Пример #3
0
    def __init__(self,
                 base_layer_models=None,
                 meta_model='RandomForestClassifier',
                 n_splits=5,
                 optimize=True,
                 max_evals=100):
        '''
        Initializes the object values with given arguments.

                Parameters:
                        self (object ref)
                        base_layer_model (list) : list of models to be used at base layer in ensembling
                        meta_layer_model (string) : name of model to be used at meta layer in ensembling
                        n_splits(int) : number of splits to be made for cross validation
                        optimize(boolean) : optimize the process 
                        max_evals(int) : max number of evaluations to be done
                        
        '''
        if base_layer_models == None:
            base_layer_models = [
                'LogisticRegression', 'DecisionTreeClassifier',
                'RandomForestClassifier', 'GradientBoostingClassifier',
                'ExtraTreesClassifier', 'AdaBoostClassifier'
            ]
            #base_layer_models = ['LogisticRegression', 'DecisionTreeClassifier']

        self.models = [get_model(model) for model in base_layer_models]

        if meta_model == None:
            self.meta_model = None
        elif meta_model in ['AdaBoostClassifier', 'BaggingClassifier']:
            self.meta_model = get_model(meta_model)(
                base_estimator=get_model('RandomForestClassifier')())
        else:
            self.meta_model = get_model(meta_model)()

        self.n_splits = n_splits
        self.trained_models = []
        self.optimize = optimize
        self.max_evals = max_evals