Exemplo n.º 1
0
    def get_hyper_parameter(cls):
        ### Specific function to handle the fact that I don't want ngram != 1 IF analyzer = word ###
        res = hp.HyperComposition([
            (
                0.5,
                hp.HyperCrossProduct({
                    "ngram_range": 1,
                    "analyzer": "word",
                    "min_df": [1, 0.001, 0.01, 0.05],
                    "max_df": [0.999, 0.99, 0.95],
                    "tfidf": [True, False],
                }),
            ),
            (
                0.5,
                hp.HyperCrossProduct({
                    "ngram_range":
                    hp.HyperRangeBetaInt(
                        start=1, end=5, alpha=2, beta=1
                    ),  # 1 = 1.5% ; 2 = 12% ; 3 = 25% ; 4 = 37% ; 5 = 24%
                    "analyzer":
                    hp.HyperChoice(("char", "char_wb")),
                    "min_df": [1, 0.001, 0.01, 0.05],
                    "max_df": [0.999, 0.99, 0.95],
                    "tfidf": [True, False],
                }),
            ),
        ])

        return res
Exemplo n.º 2
0
class ModelRepresentationBase(_AbstractModelRepresentation):
    """ class just to store the default HyperParameters """

    default_hyper = {
        "n_components":
        hp.HyperRangeFloat(start=0.1, end=1, step=0.05),
        # Forest like estimators
        "n_estimators":
        hp.HyperComposition([
            (0.75, hp.HyperRangeInt(start=25, end=175, step=25)),
            (0.25, hp.HyperRangeInt(start=200, end=1000, step=100)),
        ]),
        "max_features":
        hp.HyperComposition([(0.25, ["sqrt", "auto"]),
                             (0.75,
                              hp.HyperRangeBetaFloat(start=0,
                                                     end=1,
                                                     alpha=3,
                                                     beta=1))]),
        "max_depth":
        hp.HyperChoice([
            None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25,
            30, 50, 100
        ]),
        "min_samples_split":
        hp.HyperRangeBetaInt(start=2, end=100, alpha=1, beta=5),
        # Linear model
        "C":
        hp.HyperLogRangeFloat(start=0.00001, end=10, n=50),
        "alpha":
        hp.HyperLogRangeFloat(start=0.00001, end=10, n=50),
        # CV
        "analyzer":
        hp.HyperChoice(["word", "char", "char_wb"]),
        "penalty": ["l1", "l2"],
        "random_state": [
            123
        ],  # So that every for every model with a random_state attribute, it will be passed and fix
        "drop_used_columns": [True],
        "drop_unused_columns": [True]
    }
    # This dictionnary is used to specify the default hyper-parameters that are used during the random search phase
    # They will be used if :
    # * the model has a paramters among that list
    # * the parameters is not specified within the class (within 'custom_hyper')

    default_default_hyper = {
        "random_state": 123,
        "drop_used_columns": True,
        "drop_unused_columns": True
    }
    # This dictionnary is used to specify the default hyper-parameters that are used during the default model phase
    # They will be used if :
    # * the model has a paramters among that list
    # * the default parameters is not specified within the class (withing 'default_parameters')

    depends_on = ()
Exemplo n.º 3
0
class ModelRepresentationBase(_AbstractModelRepresentation):
    """ class just to store the default HyperParameters """

    default_hyper = {
        "n_components":
        hp.HyperRangeFloat(start=0.1, end=1, step=0.05),
        # Forest like estimators
        "n_estimators":
        hp.HyperComposition([
            (0.75, hp.HyperRangeInt(start=25, end=175, step=25)),
            (0.25, hp.HyperRangeInt(start=200, end=1000, step=100)),
        ]),
        "max_features":
        hp.HyperComposition([(0.25, ["sqrt", "auto"]),
                             (0.75,
                              hp.HyperRangeBetaFloat(start=0,
                                                     end=1,
                                                     alpha=3,
                                                     beta=1))]),
        "max_depth":
        hp.HyperChoice([
            None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 20, 25,
            30, 50, 100
        ]),
        "min_samples_split":
        hp.HyperRangeBetaInt(start=1, end=100, alpha=1, beta=5),
        # Linear model
        "C":
        hp.HyperLogRangeFloat(start=0.00001, end=10, n=50),
        "alpha":
        hp.HyperLogRangeFloat(start=0.00001, end=10, n=50),
        # CV
        "analyzer":
        hp.HyperChoice(["word", "char", "char_wb"]),
        "penalty": ["l1", "l2"],
        "random_state": [
            123
        ],  # So that every for every model with a random_state attribute, it will be passed and fix
        "columns_to_encode": ["--object--"]
    }