def get_pipe(self, ): if self.inner_cv is None: inner_cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=0) else: inner_cv = self.inner_cv gridpoints = self.gridpoints param_grid = {'C': np.logspace(-2, 4, gridpoints)} steps = [ ('polyfeat', PolynomialFeatures(interaction_only=0, degree=2)), # create interactions among them ('drop_constant', DropConst()), ('shrink_k2', ShrinkBigKTransformer( selector=LassoLarsCV(cv=inner_cv, max_iter=64))), ('scaler', StandardScaler()), ('reg', GridSearchCV(LinearSVR(random_state=0, tol=1e-4, max_iter=1000), param_grid=param_grid)) ] if self.bestT: steps = [ steps[0], ('xtransform', ColumnBestTransformer(float_k=len(self.float_idx))), *steps[1:] ] outerpipe = Pipeline(steps=steps) if self.do_prep: steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)), ('post', outerpipe)] outerpipe = Pipeline(steps=steps) return outerpipe
def get_pipe(self, ): if self.inner_cv is None: inner_cv = RepeatedKFold(n_splits=self.cv_splits, n_repeats=self.cv_repeats, random_state=0) else: inner_cv = self.inner_cv gridpoints = self.gridpoints param_grid = { 'C': np.logspace(-2, 2, gridpoints), 'gamma': np.logspace(-2, 0.5, gridpoints) } steps = [('scaler', StandardScaler()), ('reg', GridSearchCV(SVR(kernel='rbf', cache_size=10000, tol=1e-4, max_iter=5000), param_grid=param_grid))] if self.bestT: steps.insert(0, ('xtransform', ColumnBestTransformer(float_k=len(self.float_idx)))) outerpipe = Pipeline(steps=steps) if self.do_prep: steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)), ('post', outerpipe)] outerpipe = Pipeline(steps=steps) return outerpipe
def get_pipe(self, ): if self.inner_cv is None: inner_cv = RepeatedKFold(n_splits=self.cv_splits, n_repeats=self.cv_repeats, random_state=0) else: inner_cv = self.inner_cv steps = [('scaler', StandardScaler()), ('select', ShrinkBigKTransformer(max_k=4)), ('reg', FlexibleEstimator(**self.flex_kwargs))] if self.bestT: steps.insert(0, 'xtransform', ColumnBestTransformer(float_k=len(self.float_idx))) pipe = Pipeline(steps=steps) param_grid = { 'select__k_share': np.linspace(0.2, 1, self.gridpoints * 2) } if self.functional_form_search: param_grid['reg__form'] = ['powXB', 'expXB'] # ,'linear'] outerpipe = GridSearchCV(pipe, param_grid=param_grid) if self.do_prep: steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)), ('post', outerpipe)] outerpipe = Pipeline(steps=steps) return outerpipe
def get_pipe(self, ): if self.inner_cv is None: inner_cv = RepeatedKFold(n_splits=self.cv_splits, n_repeats=self.cv_repeats, random_state=0) else: inner_cv = self.inner_cv gridpoints = self.gridpoints n_alphas = gridpoints * 5 l1_ratio = 1 - np.logspace(-2, -.03, gridpoints * 2) steps = [('scaler', StandardScaler()), ('reg', ElasticNetCV(cv=inner_cv, normalize=False, l1_ratio=l1_ratio, n_alphas=n_alphas))] if self.bestT: steps.insert(0, ('xtransform', ColumnBestTransformer(float_k=len(self.float_idx)))) outerpipe = Pipeline(steps=steps) if self.do_prep: steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)), ('post', outerpipe)] outerpipe = Pipeline(steps=steps) return outerpipe
def get_pipe(self): if self.inner_cv is None: inner_cv = RepeatedKFold(n_splits=self.cv_splits, n_repeats=self.cv_repeats, random_state=0) else: inner_cv = self.inner_cv if self.est_kwargs is None: self.est_kwargs = {'max_depth': [3, 4], 'n_estimators': [64, 128]} hyper_param_dict, gbr_params = self.extractParams(self.est_kwargs) if not 'random_state' in gbr_params: gbr_params['random_state'] = 0 steps = [('reg', GridSearchCV(GradientBoostingRegressor(**gbr_params), param_grid=hyper_param_dict, cv=inner_cv))] if self.bestT: steps.insert(0, 'xtransform', ColumnBestTransformer(float_k=len(self.float_idx))) outerpipe = Pipeline(steps=steps) if self.do_prep: steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)), ('post', outerpipe)] outerpipe = Pipeline(steps=steps) return outerpipe
def get_pipe(self, ): if self.inner_cv is None: inner_cv = RepeatedKFold(n_splits=self.cv_splits, n_repeats=self.cv_repeats, random_state=0) else: inner_cv = self.inner_cv gridpoints = self.gridpoints transformer_list = [None_T(), Log_T(), LogP1_T()] # ,logp1_T()] # log_T()]# steps = [ ('shrink_k1', ShrinkBigKTransformer(selector=LassoLarsCV(cv=inner_cv, max_iter=32))), # retain a subset of the best original variables ('polyfeat', PolynomialFeatures(interaction_only=0, degree=2)), # create interactions among them ('drop_constant', DropConst()), ('shrink_k2', ShrinkBigKTransformer(selector=LassoLarsCV(cv=inner_cv, max_iter=64))), # pick from all of those options ('reg', LinearRegression())] if self.bestT: steps.insert(0, ('xtransform', ColumnBestTransformer(float_k=len(self.float_idx)))) X_T_pipe = Pipeline(steps=steps) Y_T_X_T_pipe = Pipeline(steps=[('ttr', TransformedTargetRegressor(regressor=X_T_pipe))]) Y_T__param_grid = { 'ttr__transformer': transformer_list, 'ttr__regressor__polyfeat__degree': [2], } outerpipe = GridSearchCV(Y_T_X_T_pipe, param_grid=Y_T__param_grid, cv=inner_cv) if self.do_prep: steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)), ('post', outerpipe)] outerpipe = Pipeline(steps=steps) return outerpipe
def get_pipe(self): if self.inner_cv is None: inner_cv = RepeatedKFold(n_splits=self.cv_splits, n_repeats=self.cv_repeats, random_state=0) else: inner_cv = self.inner_cv steps = [('reg', LassoLarsCV(cv=inner_cv, max_n_alphas=self.max_n_alphas))] if self.bestT: steps.insert(0, 'xtransform', ColumnBestTransformer(float_k=len(self.float_idx))) outerpipe = Pipeline(steps=steps) if self.do_prep: steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)), ('post', outerpipe)] outerpipe = Pipeline(steps=steps) return outerpipe
def get_pipe(self): if self.inner_cv is None: inner_cv = RepeatedKFold(n_splits=10, n_repeats=1, random_state=0) else: inner_cv = self.inner_cv param_grid = {'max_depth': list(range(1, 3)), 'n_estimators': [75, 100] } steps = [('reg', GridSearchCV(GradientBoostingRegressor(random_state=0), param_grid=param_grid, cv=inner_cv, n_jobs=1))] # steps = [('reg', GradientBoostingRegressor(random_state=0))] if self.bestT: steps.insert(0, 'xtransform', ColumnBestTransformer(float_k=len(self.float_idx))) outerpipe = Pipeline(steps=steps) if self.do_prep: steps = [('prep', MissingValHandler(prep_dict=self.prep_dict)), ('post', outerpipe)] outerpipe = Pipeline(steps=steps) return outerpipe