예제 #1
0
    def get_pipe(self, ):
        if self.inner_cv is None:
            inner_cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=0)
        else:
            inner_cv = self.inner_cv

        gridpoints = self.gridpoints
        param_grid = {'C': np.logspace(-2, 4, gridpoints)}
        steps = [
            ('polyfeat',
             PolynomialFeatures(interaction_only=0,
                                degree=2)),  # create interactions among them
            ('drop_constant', DropConst()),
            ('shrink_k2',
             ShrinkBigKTransformer(
                 selector=LassoLarsCV(cv=inner_cv, max_iter=64))),
            ('scaler', StandardScaler()),
            ('reg',
             GridSearchCV(LinearSVR(random_state=0, tol=1e-4, max_iter=1000),
                          param_grid=param_grid))
        ]
        if self.bestT:
            steps = [
                steps[0],
                ('xtransform',
                 ColumnBestTransformer(float_k=len(self.float_idx))),
                *steps[1:]
            ]
        outerpipe = Pipeline(steps=steps)
        if self.do_prep:
            steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)),
                     ('post', outerpipe)]
            outerpipe = Pipeline(steps=steps)
        return outerpipe
예제 #2
0
파일: svr.py 프로젝트: quanted/vb_django
    def get_pipe(self, ):
        if self.inner_cv is None:
            inner_cv = RepeatedKFold(n_splits=self.cv_splits,
                                     n_repeats=self.cv_repeats,
                                     random_state=0)
        else:
            inner_cv = self.inner_cv

        gridpoints = self.gridpoints
        param_grid = {
            'C': np.logspace(-2, 2, gridpoints),
            'gamma': np.logspace(-2, 0.5, gridpoints)
        }
        steps = [('scaler', StandardScaler()),
                 ('reg',
                  GridSearchCV(SVR(kernel='rbf',
                                   cache_size=10000,
                                   tol=1e-4,
                                   max_iter=5000),
                               param_grid=param_grid))]
        if self.bestT:
            steps.insert(0,
                         ('xtransform',
                          ColumnBestTransformer(float_k=len(self.float_idx))))
        outerpipe = Pipeline(steps=steps)
        if self.do_prep:
            steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)),
                     ('post', outerpipe)]
            outerpipe = Pipeline(steps=steps)
        return outerpipe
예제 #3
0
    def get_pipe(self, ):
        if self.inner_cv is None:
            inner_cv = RepeatedKFold(n_splits=self.cv_splits,
                                     n_repeats=self.cv_repeats,
                                     random_state=0)
        else:
            inner_cv = self.inner_cv

        steps = [('scaler', StandardScaler()),
                 ('select', ShrinkBigKTransformer(max_k=4)),
                 ('reg', FlexibleEstimator(**self.flex_kwargs))]
        if self.bestT:
            steps.insert(0, 'xtransform',
                         ColumnBestTransformer(float_k=len(self.float_idx)))

        pipe = Pipeline(steps=steps)
        param_grid = {
            'select__k_share': np.linspace(0.2, 1, self.gridpoints * 2)
        }
        if self.functional_form_search:
            param_grid['reg__form'] = ['powXB', 'expXB']  # ,'linear']

        outerpipe = GridSearchCV(pipe, param_grid=param_grid)
        if self.do_prep:
            steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)),
                     ('post', outerpipe)]
            outerpipe = Pipeline(steps=steps)

        return outerpipe
예제 #4
0
    def get_pipe(self, ):
        if self.inner_cv is None:
            inner_cv = RepeatedKFold(n_splits=self.cv_splits,
                                     n_repeats=self.cv_repeats,
                                     random_state=0)
        else:
            inner_cv = self.inner_cv
        gridpoints = self.gridpoints
        n_alphas = gridpoints * 5
        l1_ratio = 1 - np.logspace(-2, -.03, gridpoints * 2)
        steps = [('scaler', StandardScaler()),
                 ('reg',
                  ElasticNetCV(cv=inner_cv,
                               normalize=False,
                               l1_ratio=l1_ratio,
                               n_alphas=n_alphas))]

        if self.bestT:
            steps.insert(0,
                         ('xtransform',
                          ColumnBestTransformer(float_k=len(self.float_idx))))
        outerpipe = Pipeline(steps=steps)
        if self.do_prep:
            steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)),
                     ('post', outerpipe)]
            outerpipe = Pipeline(steps=steps)
        return outerpipe
예제 #5
0
 def get_pipe(self):
     if self.inner_cv is None:
         inner_cv = RepeatedKFold(n_splits=self.cv_splits,
                                  n_repeats=self.cv_repeats,
                                  random_state=0)
     else:
         inner_cv = self.inner_cv
     if self.est_kwargs is None:
         self.est_kwargs = {'max_depth': [3, 4], 'n_estimators': [64, 128]}
     hyper_param_dict, gbr_params = self.extractParams(self.est_kwargs)
     if not 'random_state' in gbr_params:
         gbr_params['random_state'] = 0
     steps = [('reg',
               GridSearchCV(GradientBoostingRegressor(**gbr_params),
                            param_grid=hyper_param_dict,
                            cv=inner_cv))]
     if self.bestT:
         steps.insert(0, 'xtransform',
                      ColumnBestTransformer(float_k=len(self.float_idx)))
     outerpipe = Pipeline(steps=steps)
     if self.do_prep:
         steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)),
                  ('post', outerpipe)]
         outerpipe = Pipeline(steps=steps)
     return outerpipe
예제 #6
0
    def get_pipe(self, ):
        if self.inner_cv is None:
            inner_cv = RepeatedKFold(n_splits=self.cv_splits, n_repeats=self.cv_repeats, random_state=0)
        else:
            inner_cv = self.inner_cv
        gridpoints = self.gridpoints
        transformer_list = [None_T(), Log_T(), LogP1_T()]  # ,logp1_T()] # log_T()]#
        steps = [
            ('shrink_k1', ShrinkBigKTransformer(selector=LassoLarsCV(cv=inner_cv, max_iter=32))),   # retain a subset of the best original variables
            ('polyfeat', PolynomialFeatures(interaction_only=0, degree=2)),  # create interactions among them

            ('drop_constant', DropConst()),
            ('shrink_k2', ShrinkBigKTransformer(selector=LassoLarsCV(cv=inner_cv, max_iter=64))),   # pick from all of those options
            ('reg', LinearRegression())]
        if self.bestT:
            steps.insert(0, ('xtransform', ColumnBestTransformer(float_k=len(self.float_idx))))

        X_T_pipe = Pipeline(steps=steps)
        Y_T_X_T_pipe = Pipeline(steps=[('ttr', TransformedTargetRegressor(regressor=X_T_pipe))])
        Y_T__param_grid = {
            'ttr__transformer': transformer_list,
            'ttr__regressor__polyfeat__degree': [2],
        }
        outerpipe = GridSearchCV(Y_T_X_T_pipe, param_grid=Y_T__param_grid, cv=inner_cv)
        if self.do_prep:
            steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)),
                     ('post', outerpipe)]
            outerpipe = Pipeline(steps=steps)

        return outerpipe
예제 #7
0
파일: l1lars.py 프로젝트: quanted/vb_django
    def get_pipe(self):
        if self.inner_cv is None:
            inner_cv = RepeatedKFold(n_splits=self.cv_splits, n_repeats=self.cv_repeats, random_state=0)
        else:
            inner_cv = self.inner_cv

        steps = [('reg', LassoLarsCV(cv=inner_cv, max_n_alphas=self.max_n_alphas))]
        if self.bestT:
            steps.insert(0, 'xtransform', ColumnBestTransformer(float_k=len(self.float_idx)))
        outerpipe = Pipeline(steps=steps)

        if self.do_prep:
            steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)), ('post', outerpipe)]
            outerpipe = Pipeline(steps=steps)
        return outerpipe
예제 #8
0
    def get_pipe(self):
        if self.inner_cv is None:
            inner_cv = RepeatedKFold(n_splits=10, n_repeats=1, random_state=0)
        else:
            inner_cv = self.inner_cv

        param_grid = {'max_depth': list(range(1, 3)),
                      'n_estimators': [75, 100]
                      }
        steps = [('reg', GridSearchCV(GradientBoostingRegressor(random_state=0), param_grid=param_grid, cv=inner_cv, n_jobs=1))]
        # steps = [('reg', GradientBoostingRegressor(random_state=0))]

        if self.bestT:
            steps.insert(0, 'xtransform', ColumnBestTransformer(float_k=len(self.float_idx)))
        outerpipe = Pipeline(steps=steps)
        if self.do_prep:
            steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)),
                     ('post', outerpipe)]
            outerpipe = Pipeline(steps=steps)
        return outerpipe