class ModelRepresentationBase(_AbstractModelRepresentation): """ class just to store the default HyperParameters """ default_hyper = { "n_components": hp.HyperRangeFloat(start=0.1, end=1, step=0.05), # Forest like estimators "n_estimators": hp.HyperComposition([ (0.75, hp.HyperRangeInt(start=25, end=175, step=25)), (0.25, hp.HyperRangeInt(start=200, end=1000, step=100)), ]), "max_features": hp.HyperComposition([(0.25, ["sqrt", "auto"]), (0.75, hp.HyperRangeBetaFloat(start=0, end=1, alpha=3, beta=1))]), "max_depth": hp.HyperChoice([ None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25, 30, 50, 100 ]), "min_samples_split": hp.HyperRangeBetaInt(start=2, end=100, alpha=1, beta=5), # Linear model "C": hp.HyperLogRangeFloat(start=0.00001, end=10, n=50), "alpha": hp.HyperLogRangeFloat(start=0.00001, end=10, n=50), # CV "analyzer": hp.HyperChoice(["word", "char", "char_wb"]), "penalty": ["l1", "l2"], "random_state": [ 123 ], # So that every for every model with a random_state attribute, it will be passed and fix "drop_used_columns": [True], "drop_unused_columns": [True] } # This dictionnary is used to specify the default hyper-parameters that are used during the random search phase # They will be used if : # * the model has a paramters among that list # * the parameters is not specified within the class (within 'custom_hyper') default_default_hyper = { "random_state": 123, "drop_used_columns": True, "drop_unused_columns": True } # This dictionnary is used to specify the default hyper-parameters that are used during the default model phase # They will be used if : # * the model has a paramters among that list # * the default parameters is not specified within the class (withing 'default_parameters') depends_on = ()
def get_hyper_parameter(cls): ### Specific function to handle the fact that I don't want ngram != 1 IF analyzer = word ### res = hp.HyperComposition([ ( 0.5, hp.HyperCrossProduct({ "ngram_range": 1, "analyzer": "word", "min_df": [1, 0.001, 0.01, 0.05], "max_df": [0.999, 0.99, 0.95], "tfidf": [True, False], }), ), ( 0.5, hp.HyperCrossProduct({ "ngram_range": hp.HyperRangeBetaInt( start=1, end=5, alpha=2, beta=1 ), # 1 = 1.5% ; 2 = 12% ; 3 = 25% ; 4 = 37% ; 5 = 24% "analyzer": hp.HyperChoice(("char", "char_wb")), "min_df": [1, 0.001, 0.01, 0.05], "max_df": [0.999, 0.99, 0.95], "tfidf": [True, False], }), ), ]) return res
class ModelRepresentationBase(_AbstractModelRepresentation): """ class just to store the default HyperParameters """ default_hyper = { "n_components": hp.HyperRangeFloat(start=0.1, end=1, step=0.05), # Forest like estimators "n_estimators": hp.HyperComposition([ (0.75, hp.HyperRangeInt(start=25, end=175, step=25)), (0.25, hp.HyperRangeInt(start=200, end=1000, step=100)), ]), "max_features": hp.HyperComposition([(0.25, ["sqrt", "auto"]), (0.75, hp.HyperRangeBetaFloat(start=0, end=1, alpha=3, beta=1))]), "max_depth": hp.HyperChoice([ None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25, 30, 50, 100 ]), "min_samples_split": hp.HyperRangeBetaInt(start=1, end=100, alpha=1, beta=5), # Linear model "C": hp.HyperLogRangeFloat(start=0.00001, end=10, n=50), "alpha": hp.HyperLogRangeFloat(start=0.00001, end=10, n=50), # CV "analyzer": hp.HyperChoice(["word", "char", "char_wb"]), "penalty": ["l1", "l2"], "random_state": [ 123 ], # So that every for every model with a random_state attribute, it will be passed and fix "columns_to_encode": ["--object--"] }
class BoxCoxTargetTransformer_TargetModifier(ModelRepresentationBase): klass = BoxCoxTargetTransformer category = StepCategories.TargetTransformer type_of_variable = None # is_regression = True type_of_model = TypeOfProblem.REGRESSION custom_hyper = { "ll": hp.HyperComposition([(0.1, [0]), (0.9, hp.HyperRangeFloat(0, 2))]) } use_y = True
class TargetEncoderRegressor_CatEncoder(ModelRepresentationBase): klass = TargetEncoderRegressor category = StepCategories.CategoryEncoder type_of_variable = (TypeOfVariables.CAT, TypeOfVariables.NUM) custom_hyper = { "cv": [None, 2, 5, 10], "noise_level": hp.HyperComposition([(0.5, [None]), (0.5, hp.HyperRangeFloat(0, 1))]), "smoothing_min": hp.HyperRangeFloat(0, 10), "smoothing_value": hp.HyperRangeFloat(0, 10), } # is_regression = True type_of_model = TypeOfProblem.REGRESSION use_y = True
def get_hyper_parameter(cls): """ specific function to handle dependency between hyper-parameters : bagging_fraction AND bagging_freq """ res = hp.HyperComposition([ ################## ### No Bagging ### ################## # * bagging_freq == 0 # * bagging_fraction == 1.0 # * no random forest here : 'booting_type' != 'rf' ( 0.5, hp.HyperCrossProduct({ "boosting_type": ["gbdt", "dart"], "learning_rate": hp.HyperLogRangeFloat(0.0001, 0.1), "max_depth": hp.HyperChoice([ -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25, 30, 50, 100 ]), "n_estimators": hp.HyperComposition([ (0.50, hp.HyperRangeInt(start=25, end=175, step=25)), (0.25, hp.HyperRangeInt(start=200, end=900, step=100)), (0.25, hp.HyperRangeInt(start=1000, end=10000, step=100)), ]), "colsample_bytree": hp.HyperRangeBetaFloat(start=0.1, end=1, alpha=3, beta=1), # Mean = 0.75 "min_child_samples": hp.HyperRangeInt(2, 50), "num_leaves": hp.HyperRangeInt(10, 200), "bagging_fraction": [1.0], "bagging_freq": [0], "n_jobs": [1], }), ), ############### ### Bagging ### ############### # * bagging_freq = 1 # * bagging_fraction < 1 ( 0.5, hp.HyperCrossProduct({ "boosting_type": ["rf", "gbdt", "dart"], "learning_rate": hp.HyperLogRangeFloat(0.0001, 0.1), "max_depth": hp.HyperChoice([ -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25, 30, 50, 100 ]), "n_estimators": hp.HyperComposition([ (0.50, hp.HyperRangeInt(start=25, end=175, step=25)), (0.25, hp.HyperRangeInt(start=200, end=900, step=100)), (0.25, hp.HyperRangeInt(start=1000, end=10000, step=100)), ]), "colsample_bytree": hp.HyperRangeBetaFloat(start=0.1, end=1, alpha=3, beta=1), # Mean = 0.75 "min_child_samples": hp.HyperRangeInt(2, 50), "num_leaves": hp.HyperRangeInt(10, 200), "bagging_fraction": hp.HyperRangeBetaFloat(start=0.1, end=1, alpha=3, beta=1), "bagging_freq": [1], "n_jobs": [1], }), ), ]) return res