def get_hyper_parameter(cls): ### Specific function to handle the fact that I don't want ngram != 1 IF analyzer = word ### res = hp.HyperComposition([ ( 0.5, hp.HyperCrossProduct({ "ngram_range": 1, "analyzer": "word", "min_df": [1, 0.001, 0.01, 0.05], "max_df": [0.999, 0.99, 0.95], "tfidf": [True, False], }), ), ( 0.5, hp.HyperCrossProduct({ "ngram_range": hp.HyperRangeBetaInt( start=1, end=5, alpha=2, beta=1 ), # 1 = 1.5% ; 2 = 12% ; 3 = 25% ; 4 = 37% ; 5 = 24% "analyzer": hp.HyperChoice(("char", "char_wb")), "min_df": [1, 0.001, 0.01, 0.05], "max_df": [0.999, 0.99, 0.95], "tfidf": [True, False], }), ), ]) return res
class ModelRepresentationBase(_AbstractModelRepresentation): """ class just to store the default HyperParameters """ default_hyper = { "n_components": hp.HyperRangeFloat(start=0.1, end=1, step=0.05), # Forest like estimators "n_estimators": hp.HyperComposition([ (0.75, hp.HyperRangeInt(start=25, end=175, step=25)), (0.25, hp.HyperRangeInt(start=200, end=1000, step=100)), ]), "max_features": hp.HyperComposition([(0.25, ["sqrt", "auto"]), (0.75, hp.HyperRangeBetaFloat(start=0, end=1, alpha=3, beta=1))]), "max_depth": hp.HyperChoice([ None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25, 30, 50, 100 ]), "min_samples_split": hp.HyperRangeBetaInt(start=2, end=100, alpha=1, beta=5), # Linear model "C": hp.HyperLogRangeFloat(start=0.00001, end=10, n=50), "alpha": hp.HyperLogRangeFloat(start=0.00001, end=10, n=50), # CV "analyzer": hp.HyperChoice(["word", "char", "char_wb"]), "penalty": ["l1", "l2"], "random_state": [ 123 ], # So that every for every model with a random_state attribute, it will be passed and fix "drop_used_columns": [True], "drop_unused_columns": [True] } # This dictionnary is used to specify the default hyper-parameters that are used during the random search phase # They will be used if : # * the model has a paramters among that list # * the parameters is not specified within the class (within 'custom_hyper') default_default_hyper = { "random_state": 123, "drop_used_columns": True, "drop_unused_columns": True } # This dictionnary is used to specify the default hyper-parameters that are used during the default model phase # They will be used if : # * the model has a paramters among that list # * the default parameters is not specified within the class (withing 'default_parameters') depends_on = ()
class ModelRepresentationBase(_AbstractModelRepresentation): """ class just to store the default HyperParameters """ default_hyper = { "n_components": hp.HyperRangeFloat(start=0.1, end=1, step=0.05), # Forest like estimators "n_estimators": hp.HyperComposition([ (0.75, hp.HyperRangeInt(start=25, end=175, step=25)), (0.25, hp.HyperRangeInt(start=200, end=1000, step=100)), ]), "max_features": hp.HyperComposition([(0.25, ["sqrt", "auto"]), (0.75, hp.HyperRangeBetaFloat(start=0, end=1, alpha=3, beta=1))]), "max_depth": hp.HyperChoice([ None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25, 30, 50, 100 ]), "min_samples_split": hp.HyperRangeBetaInt(start=1, end=100, alpha=1, beta=5), # Linear model "C": hp.HyperLogRangeFloat(start=0.00001, end=10, n=50), "alpha": hp.HyperLogRangeFloat(start=0.00001, end=10, n=50), # CV "analyzer": hp.HyperChoice(["word", "char", "char_wb"]), "penalty": ["l1", "l2"], "random_state": [ 123 ], # So that every for every model with a random_state attribute, it will be passed and fix "columns_to_encode": ["--object--"] }