def _safe_indexing( X: Union[OneDimArrayLikeType, TwoDimArrayLikeType], indices: OneDimArrayLikeType ) -> Union[OneDimArrayLikeType, TwoDimArrayLikeType]: if X is None: return X return sklearn_safe_indexing(X, indices)
def _safe_indexing( X, # type: Union[OneDimArrayLikeType, TwoDimArrayLikeType] indices, # type: OneDimArrayLikeType ): # type: (...) -> Union[OneDimArrayLikeType, TwoDimArrayLikeType] if X is None: return X return sklearn_safe_indexing(X, indices)
def _check_fit_params( X, # type: TwoDimArrayLikeType fit_params, # type: Dict indices, # type: OneDimArrayLikeType ): # type: (...) -> Dict fit_params_validated = {} for key, value in fit_params.items(): # NOTE Original implementation: # https://github.com/scikit-learn/scikit-learn/blob/ \ # 2467e1b84aeb493a22533fa15ff92e0d7c05ed1c/sklearn/utils/validation.py#L1324-L1328 # Scikit-learn does not accept non-iterable inputs. # This line is for keeping backward compatibility. # (See: https://github.com/scikit-learn/scikit-learn/issues/15805) if not _is_arraylike(value) or _num_samples(value) != _num_samples(X): fit_params_validated[key] = value else: fit_params_validated[key] = _make_indexable(value) fit_params_validated[key] = sklearn_safe_indexing(fit_params_validated[key], indices) return fit_params_validated
def fit( self, X, # type: TwoDimArrayLikeType y=None, # type: Optional[Union[OneDimArrayLikeType, TwoDimArrayLikeType]] groups=None, # type: Optional[OneDimArrayLikeType] **fit_params # type: Any ): # type: (...) -> 'OptunaSearchCV' """Run fit with all sets of parameters. Args: X: Training data. y: Target variable. groups: Group labels for the samples used while splitting the dataset into train/test set. **fit_params: Parameters passed to ``fit`` on the estimator. Returns: self: Return self. """ self._check_params() random_state = check_random_state(self.random_state) max_samples = self.subsample n_samples = _num_samples(X) old_level = _logger.getEffectiveLevel() if self.verbose > 1: _logger.setLevel(DEBUG) elif self.verbose > 0: _logger.setLevel(INFO) else: _logger.setLevel(WARNING) self.sample_indices_ = np.arange(n_samples) if type(max_samples) is float: max_samples = int(max_samples * n_samples) if max_samples < n_samples: self.sample_indices_ = random_state.choice( self.sample_indices_, max_samples, replace=False ) self.sample_indices_.sort() X_res = sklearn_safe_indexing(X, self.sample_indices_) y_res = sklearn_safe_indexing(y, self.sample_indices_) groups_res = sklearn_safe_indexing(groups, self.sample_indices_) fit_params_res = fit_params if fit_params_res is not None: fit_params_res = _check_fit_params(X, fit_params, self.sample_indices_) classifier = is_classifier(self.estimator) cv = check_cv(self.cv, y_res, classifier) self.n_splits_ = cv.get_n_splits(X_res, y_res, groups=groups_res) self.scorer_ = check_scoring(self.estimator, scoring=self.scoring) if self.study is None: seed = random_state.randint(0, np.iinfo("int32").max) sampler = samplers.TPESampler(seed=seed) self.study_ = study_module.create_study(direction="maximize", sampler=sampler) else: self.study_ = self.study objective = _Objective( self.estimator, self.param_distributions, X_res, y_res, cv, self.enable_pruning, self.error_score, fit_params_res, groups_res, self.max_iter, self.return_train_score, self.scorer_, ) _logger.info( "Searching the best hyperparameters using {} " "samples...".format(_num_samples(self.sample_indices_)) ) self.study_.optimize( objective, n_jobs=self.n_jobs, n_trials=self.n_trials, timeout=self.timeout ) _logger.info("Finished hyperparemeter search!") if self.refit: self._refit(X, y, **fit_params) _logger.setLevel(old_level) return self
def safe_indexing(X, indices): if X is None: return X else: return sklearn_safe_indexing(X, indices)