Пример #1
0
 def dtr(self):
     return dict(
         cls=DecisionTreeRegressor,
         splitter=Choice(["best", "random"]),
         max_depth=Choice([None, 3, 5, 10, 20, 50]),
         random_state=randint(),
     )
Пример #2
0
def default_dt_space(**hyperparams):
    space = HyperSpace()
    with space.as_default():
        p_nets = MultipleChoice([
            'dnn_nets', 'linear', 'cin_nets', 'fm_nets', 'afm_nets',
            'pnn_nets', 'cross_nets', 'cross_dnn_nets', 'dcn_nets',
            'autoint_nets', 'fgcnn_dnn_nets', 'fibi_dnn_nets'
        ],
                                num_chosen_most=3)
        dt_module = DTModuleSpace(
            nets=p_nets,
            auto_categorize=Bool(),
            cat_remain_numeric=Bool(),
            auto_discrete=Bool(),
            apply_gbm_features=Bool(),
            gbm_feature_type=Choice([
                DT_consts.GBM_FEATURE_TYPE_DENSE,
                DT_consts.GBM_FEATURE_TYPE_EMB
            ]),
            embeddings_output_dim=Choice([4, 10, 20]),
            embedding_dropout=Choice([0, 0.1, 0.2, 0.3, 0.4, 0.5]),
            stacking_op=Choice(
                [DT_consts.STACKING_OP_ADD, DT_consts.STACKING_OP_CONCAT]),
            output_use_bias=Bool(),
            apply_class_weight=Bool(),
            earlystopping_patience=Choice([1, 3, 5]))
        dnn = DnnModule()(dt_module)
        fit = DTFit(**hyperparams)(dt_module)

    return space
Пример #3
0
def categorical_pipeline_complex(impute_strategy=None,
                                 svd_components=5,
                                 seq_no=0):
    if impute_strategy is None:
        impute_strategy = Choice(['constant', 'most_frequent'])
    elif isinstance(impute_strategy, list):
        impute_strategy = Choice(impute_strategy)
    if isinstance(svd_components, list):
        svd_components = Choice(svd_components)

    def onehot_svd():
        onehot = OneHotEncoder(name=f'categorical_onehot_{seq_no}',
                               sparse=False)
        optional_svd = Optional(TruncatedSVD(n_components=svd_components,
                                             name=f'categorical_svd_{seq_no}'),
                                name=f'categorical_optional_svd_{seq_no}',
                                keep_link=True)(onehot)
        return optional_svd

    imputer = SimpleImputer(missing_values=np.nan,
                            strategy=impute_strategy,
                            name=f'categorical_imputer_{seq_no}',
                            fill_value='')
    label_encoder = MultiLabelEncoder(
        name=f'categorical_label_encoder_{seq_no}')
    onehot = onehot_svd()
    # onehot = OneHotEncoder(name=f'categorical_onehot_{seq_no}', sparse=False)
    le_or_onehot_pca = ModuleChoice(
        [label_encoder, onehot], name=f'categorical_le_or_onehot_pca_{seq_no}')
    pipeline = Pipeline([imputer, le_or_onehot_pca],
                        name=f'categorical_pipeline_complex_{seq_no}',
                        columns=column_object_category_bool)
    return pipeline
Пример #4
0
    def __init__(self,
                 hidden_units=None,
                 reduce_factor=None,
                 dnn_dropout=None,
                 use_bn=None,
                 dnn_layers=None,
                 activation=None,
                 space=None,
                 name=None,
                 **hyperparams):
        if hidden_units is None:
            hidden_units = Choice([100, 200, 300, 500, 800, 1000])
        hyperparams['hidden_units'] = hidden_units

        if reduce_factor is None:
            reduce_factor = Choice([1, 0.8, 0.5])
        hyperparams['reduce_factor'] = reduce_factor

        if dnn_dropout is None:
            dnn_dropout = Choice([0, 0.1, 0.3, 0.5])
        hyperparams['dnn_dropout'] = dnn_dropout

        if use_bn is None:
            use_bn = Bool()
        hyperparams['use_bn'] = use_bn

        if dnn_layers is None:
            dnn_layers = Choice([1, 2, 3])
        hyperparams['dnn_layers'] = dnn_layers

        if activation is None:
            activation = 'relu'
        hyperparams['activation'] = activation

        ModuleSpace.__init__(self, space, name, **hyperparams)
Пример #5
0
def numeric_pipeline_complex(impute_strategy=None, seq_no=0):
    if impute_strategy is None:
        impute_strategy = Choice(
            ['mean', 'median', 'constant', 'most_frequent'])
    elif isinstance(impute_strategy, list):
        impute_strategy = Choice(impute_strategy)
    # reduce_skewness_kurtosis = SkewnessKurtosisTransformer(transform_fn=Choice([np.log, np.log10, np.log1p]))
    # reduce_skewness_kurtosis_optional = Optional(reduce_skewness_kurtosis, keep_link=True,
    #                                             name=f'numeric_reduce_skewness_kurtosis_optional_{seq_no}')

    imputer = SimpleImputer(missing_values=np.nan,
                            strategy=impute_strategy,
                            name=f'numeric_imputer_{seq_no}',
                            fill_value=0)
    scaler_options = ModuleChoice([
        StandardScaler(name=f'numeric_standard_scaler_{seq_no}'),
        MinMaxScaler(name=f'numeric_minmax_scaler_{seq_no}'),
        MaxAbsScaler(name=f'numeric_maxabs_scaler_{seq_no}'),
        RobustScaler(name=f'numeric_robust_scaler_{seq_no}')
    ],
                                  name=f'numeric_or_scaler_{seq_no}')
    scaler_optional = Optional(scaler_options,
                               keep_link=True,
                               name=f'numeric_scaler_optional_{seq_no}')

    pipeline = Pipeline([imputer, scaler_optional],
                        name=f'numeric_pipeline_complex_{seq_no}',
                        columns=column_number_exclude_timedelta)
    return pipeline
Пример #6
0
 def dt(self):
     return dict(
         cls=DecisionTreeClassifier,
         criterion=Choice(["gini", "entropy"]),
         splitter=Choice(["best", "random"]),
         max_depth=Choice([None, 3, 5, 10, 20, 50]),
         random_state=randint(),
     )
Пример #7
0
def mini_dt_space():
    space = HyperSpace()
    with space.as_default():
        p_nets = MultipleChoice(['dnn_nets', 'linear', 'fm_nets'],
                                num_chosen_most=2)
        dt_module = DTModuleSpace(nets=p_nets,
                                  auto_categorize=Bool(),
                                  cat_remain_numeric=Bool(),
                                  auto_discrete=Bool(),
                                  apply_gbm_features=Bool(),
                                  gbm_feature_type=Choice([
                                      DT_consts.GBM_FEATURE_TYPE_DENSE,
                                      DT_consts.GBM_FEATURE_TYPE_EMB
                                  ]),
                                  embeddings_output_dim=Choice([4, 10]),
                                  embedding_dropout=Choice([0, 0.5]),
                                  stacking_op=Choice([
                                      DT_consts.STACKING_OP_ADD,
                                      DT_consts.STACKING_OP_CONCAT
                                  ]),
                                  output_use_bias=Bool(),
                                  apply_class_weight=Bool(),
                                  earlystopping_patience=Choice([1, 3, 5]))
        dnn = DnnModule(hidden_units=Choice([100, 200]),
                        reduce_factor=Choice([1, 0.8]),
                        dnn_dropout=Choice([0, 0.3]),
                        use_bn=Bool(),
                        dnn_layers=2,
                        activation='relu')(dt_module)
        fit = DTFit(batch_size=Choice([128, 256]))(dt_module)

    return space
Пример #8
0
 def nn(self):
     solver = Choice(['lbfgs', 'sgd', 'adam'])
     return dict(
         cls=MLPClassifier,
         max_iter=Int(500, 5000, step=500),
         activation=Choice(['identity', 'logistic', 'tanh', 'relu']),
         solver=solver,
         learning_rate=Choice(['constant', 'invscaling', 'adaptive']),
         learning_rate_init_stub=Cascade(partial(self._cascade, self._nn_learning_rate_init, 'slvr'), slvr=solver),
         random_state=randint(),
     )
Пример #9
0
def get_space_num_cat_pipeline_complex(dataframe_mapper_default=False,
                                       lightgbm_fit_kwargs={},
                                       xgb_fit_kwargs={},
                                       catboost_fit_kwargs={}):
    space = HyperSpace()
    with space.as_default():
        input = HyperInput(name='input1')
        p1 = numeric_pipeline_complex()(input)
        p2 = categorical_pipeline_complex()(input)
        # p2 = categorical_pipeline_simple()(input)
        p3 = DataFrameMapper(default=dataframe_mapper_default,
                             input_df=True,
                             df_out=True,
                             df_out_dtype_transforms=[(column_object, 'int')
                                                      ])([p1, p2])

        lightgbm_init_kwargs = {
            'boosting_type': Choice(['gbdt', 'dart', 'goss']),
            'num_leaves': Choice([11, 31, 101, 301, 501]),
            'learning_rate': Real(0.001, 0.1, step=0.005),
            'n_estimators': 100,
            'max_depth': -1,
            'tree_learner': 'data'  # add for dask
            # subsample_for_bin = 200000, objective = None, class_weight = None,
            #  min_split_gain = 0., min_child_weight = 1e-3, min_child_samples = 20,
        }
        lightgbm_est = LightGBMDaskEstimator(task='binary',
                                             fit_kwargs=lightgbm_fit_kwargs,
                                             **lightgbm_init_kwargs)

        xgb_init_kwargs = {
            'tree_method': 'approx'  # add for dask
        }
        xgb_est = XGBoostDaskEstimator(task='binary',
                                       fit_kwargs=xgb_fit_kwargs,
                                       **xgb_init_kwargs)

        # catboost_init_kwargs = {
        #     'silent': True
        # }
        # catboost_est = CatBoostEstimator(task='binary', fit_kwargs=catboost_fit_kwargs, **catboost_init_kwargs)
        # or_est = ModuleChoice([lightgbm_est, xgb_est, catboost_est], name='estimator_options')(p3)

        or_est = ModuleChoice([lightgbm_est, xgb_est],
                              name='estimator_options')(p3)

        space.set_inputs(input)
    return space
Пример #10
0
    def lr(self):
        iters = [1000]
        while iters[-1] < 9000:
            iters.append(int(round(iters[-1] * 1.25, -2)))

        solver = Choice(['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'])
        penalty = Cascade(partial(self._cascade, self._lr_penalty_fn, 'slvr'), slvr=solver)
        l1_ratio = Cascade(partial(self._cascade, self._lr_l1_ratio, 'penalty'), penalty=penalty)

        return dict(
            cls=LogisticRegression,
            max_iter=Choice(iters),
            solver=solver,
            penalty_stub=penalty,
            l1_ratio_stub=l1_ratio,
            random_state=randint(),
        )
Пример #11
0
    def __init__(self, batch_size=128, epochs=None, space=None, name=None, **hyperparams):
        if batch_size is None:
            batch_size = Choice([128, 256, 512])
        hyperparams['batch_size'] = batch_size

        if epochs is not None:
            hyperparams['epochs'] = epochs

        ModuleSpace.__init__(self, space, name, **hyperparams)
        self.space.fit_params = self
Пример #12
0
def conv_cell(hp_dict,
              type,
              cell_no,
              node_no,
              left_or_right,
              inputs,
              filters,
              is_reduction=False,
              data_format=None):
    assert isinstance(inputs, list)
    assert all([isinstance(m, ModuleSpace) for m in inputs])
    name_prefix = f'{type}_C{cell_no}_N{node_no}_{left_or_right}_'

    input_choice_key = f'{type[2:]}_N{node_no}_{left_or_right}_input_choice'
    op_choice_key = f'{type[2:]}_N{node_no}_{left_or_right}_op_choice'
    hp_choice = hp_dict.get(input_choice_key)
    if hp_choice is None:
        hp_choice = MultipleChoice(list(range(len(inputs))),
                                   1,
                                   name=input_choice_key)
        hp_dict[input_choice_key] = hp_choice
    ic1 = InputChoice(inputs, 1, hp_choice=hp_choice)(inputs)
    if hp_choice is None:
        hp_dict[input_choice_key] = ic1.hp_choice

    # hp_strides = Dynamic(lambda_fn=lambda choice: (2, 2) if is_reduction and choice[0] <= 1 else (1, 1),
    #                      choice=ic1.hp_choice)
    hp_strides = (1, 1)
    hp_op_choice = hp_dict.get(op_choice_key)
    module_candidates = [
        sepconv5x5(name_prefix,
                   filters,
                   strides=hp_strides,
                   data_format=data_format),
        sepconv3x3(name_prefix,
                   filters,
                   strides=hp_strides,
                   data_format=data_format),
        avgpooling3x3(name_prefix,
                      filters,
                      strides=hp_strides,
                      data_format=data_format),
        maxpooling3x3(name_prefix,
                      filters,
                      strides=hp_strides,
                      data_format=data_format),
        identity(name_prefix)
    ]
    if hp_op_choice is None:
        hp_op_choice = Choice(list(range(len(module_candidates))),
                              name=op_choice_key)
        hp_dict[op_choice_key] = hp_op_choice
    op_choice = ModuleChoice(module_candidates, hp_or=hp_op_choice)(ic1)

    return op_choice
Пример #13
0
def get_space():
    space = HyperSpace()
    with space.as_default():
        p1 = Int(1, 100)
        p2 = Choice(['a', 'b'])
        p3 = Bool()
        p4 = Real(0.0, 1.0)
        id1 = Identity(p1=p1)
        id2 = Identity(p2=p2)(id1)
        id3 = Identity(p3=p3)(id2)
        id4 = Identity(p4=p4)(id3)
    return space
Пример #14
0
def cnn_search_space(input_shape, output_units, output_activation='softmax', block_num_choices=[2, 3, 4, 5, 6],
                     activation_choices=['relu'], filters_choices=[32, 64], kernel_size_choices=[(1, 1), (3, 3)]):
    space = HyperSpace()
    with space.as_default():
        hp_use_bn = Bool()
        hp_pooling = Choice(list(range(2)))
        hp_filters = Choice(filters_choices)
        hp_kernel_size = Choice(kernel_size_choices)
        hp_fc_units = Choice([1024, 2048, 4096])
        if len(activation_choices) == 1:
            hp_activation = activation_choices[0]
        else:
            hp_activation = Choice(activation_choices)
        hp_bn_act = Choice([seq for seq in itertools.permutations(range(2))])

        input = Input(shape=input_shape)
        blocks = Repeat(
            lambda step: conv_block(
                block_no=step,
                hp_pooling=hp_pooling,
                hp_filters=hp_filters,
                hp_kernel_size=hp_kernel_size,
                hp_use_bn=hp_use_bn,
                hp_activation=hp_activation,
                hp_bn_act=hp_bn_act),
            repeat_times=block_num_choices)(input)
        x = Flatten()(blocks)
        x = Dense(units=hp_fc_units, activation=hp_activation, name='fc1')(x)
        x = Dense(units=hp_fc_units, activation=hp_activation, name='fc2')(x)
        x = Dense(output_units, activation=output_activation, name='predictions')(x)
    return space
Пример #15
0
def tiny_dt_space(**hyperparams):
    space = HyperSpace()
    with space.as_default():
        dt_module = DTModuleSpace(nets=['dnn_nets'],
                                  auto_categorize=Bool(),
                                  cat_remain_numeric=Bool(),
                                  auto_discrete=False,
                                  apply_gbm_features=False,
                                  stacking_op=Choice([
                                      DT_consts.STACKING_OP_ADD,
                                      DT_consts.STACKING_OP_CONCAT
                                  ]),
                                  output_use_bias=Bool(),
                                  apply_class_weight=Bool(),
                                  earlystopping_patience=Choice([1, 3, 5]))
        dnn = DnnModule(hidden_units=Choice([10, 20]),
                        reduce_factor=1,
                        dnn_dropout=Choice([0, 0.3]),
                        use_bn=False,
                        dnn_layers=2,
                        activation='relu')(dt_module)
        fit = DTFit(**hyperparams)(dt_module)

    return space
Пример #16
0
 def __init__(self,
              module_list,
              columns=None,
              keep_link=False,
              space=None,
              name=None):
     assert isinstance(module_list, list), f'module_list must be a List.'
     assert len(module_list) > 0, f'module_list contains at least 1 Module.'
     assert all([isinstance(m, (ModuleSpace, list)) for m in module_list
                 ]), 'module_list can only contains ModuleSpace or list.'
     self._module_list = module_list
     self.columns = columns
     self.hp_lazy = Choice([0])
     ConnectionSpace.__init__(self,
                              self.pipeline_fn,
                              keep_link,
                              space,
                              name,
                              hp_lazy=self.hp_lazy)
Пример #17
0
 def get_space():
     space = HyperSpace()
     with space.as_default():
         id1 = Identity(p1=Int(0, 10), p2=Choice(['a', 'b']))
         id2 = Identity(p3=Real(0., 1.), p4=Bool())(id1)
     return space
Пример #18
0
 def _nn_learning_rate_init(slvr):
     if slvr in ['sgd' or 'adam']:
         return 'learning_rate_init', Choice([0.001, 0.01])
     else:
         return 'learning_rate_init', Constant(0.001)
Пример #19
0
 def _lr_penalty_fn(slvr):
     if slvr == 'saga':
         return 'penalty', Choice(['l2', 'elasticnet', 'l1', 'none'])
     else:
         return 'penalty', Constant('l2')
Пример #20
0
def func_early_stopping(p1=Choice(['a', 'b'], random_state=np.random.RandomState(9527)),
                        p2=Int(1, 10, 2, random_state=np.random.RandomState(9527)),
                        p3=Real(1.0, 5.0, random_state=np.random.RandomState(9527)),
                        p4=9):
    print(f'p1:{p1},p2:{p2},p3{p3},p4:{p4}')
    return 0.6