class BaggedDecisionTreeClassifier(): def __init__(self, n_estimators=20, bootstrap=True, bootstrap_features=False, oob_score=False, max_depth=None, min_samples_leaf=20, warm_start=False, n_jobs=None, early_stopping='auto', verbose=0, random_state=None): self.tree = DecisionTreeClassifier(max_depth=max_depth, min_samples_leaf=min_samples_leaf) self.BagDT = BaggingClassifier(base_estimator=self.tree, n_estimators=n_estimators, bootstrap=bootstrap, bootstrap_features=bootstrap_features, oob_score=oob_score, warm_start=warm_start, n_jobs=n_jobs, random_state=random_state, verbose=verbose) def decision_function(self, X): return self.BagDT.decision_function(X) def fit(self, X, y, sample_weight=None): self.BagDT.fit(X, y, sample_weight=sample_weight) return self.BagDT def get_params(self, deep=True): return self.BagDT.get_params(deep=deep) def predict(self, X): return self.BagDT.predict(X) def predict_log_proba(self, X): return self.BagDT.predict_log_proba(X) def predict_proba(self, X): return self.BagDT.predict_proba(X) def score(self, X, y, sample_weight=None): return self.BagDT.score(X, y, sample_weight=sample_weight) def set_params(self, **params): return self.BagDT.set_params(**params)
class _BaggingClassifierImpl: def __init__( self, base_estimator=None, n_estimators=10, *, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=None, random_state=None, verbose=0, ): estimator_impl = base_estimator self._hyperparams = { "base_estimator": estimator_impl, "n_estimators": n_estimators, "max_samples": max_samples, "max_features": max_features, "bootstrap": bootstrap, "bootstrap_features": bootstrap_features, "oob_score": oob_score, "warm_start": warm_start, "n_jobs": n_jobs, "random_state": random_state, "verbose": verbose, } self._wrapped_model = SKLModel(**self._hyperparams) self._hyperparams["base_estimator"] = base_estimator def get_params(self, deep=True): out = self._wrapped_model.get_params(deep=deep) # we want to return the lale operator, not the underlying impl out["base_estimator"] = self._hyperparams["base_estimator"] return out def fit(self, X, y, sample_weight=None): if isinstance(X, pd.DataFrame): feature_transformer = FunctionTransformer( func=lambda X_prime: pd.DataFrame(X_prime, columns=X.columns), inverse_func=None, check_inverse=False, ) self._hyperparams["base_estimator"] = ( feature_transformer >> self._hyperparams["base_estimator"]) self._wrapped_model = SKLModel(**self._hyperparams) self._wrapped_model.fit(X, y, sample_weight) return self def predict(self, X, **predict_params): return self._wrapped_model.predict(X, **predict_params) def predict_proba(self, X): return self._wrapped_model.predict_proba(X) def predict_log_proba(self, X): return self._wrapped_model.predict_log_proba(X) def decision_function(self, X): return self._wrapped_model.decision_function(X) def score(self, X, y, sample_weight=None): return self._wrapped_model.score(X, y, sample_weight)
class HistRandomForestClassifier(): def __init__(self, loss='auto', max_leaf_nodes=31, max_depth=None, min_samples_leaf=20, l2_regularization=0, max_bins=255, n_estimators=20, max_samples=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, categorical_features=None, monotonic_cst=None, warm_start=False, n_jobs=None, early_stopping='auto', scoring='loss', validation_fraction=0.1, n_iter_no_change=10, tol=1e-7, verbose=0, random_state=None): self.loss = loss self.max_leaf_nodes = max_leaf_nodes self.max_depth = max_depth self.min_samples_leaf = min_samples_leaf self.l2_regularization = l2_regularization self.max_bins = max_bins self.n_estimators = n_estimators self.max_samples = max_samples self.bootstrap = bootstrap self.bootstrap_features = bootstrap_features self.oob_score = oob_score self.categorical_features = categorical_features self.monotonic_cst = monotonic_cst self.warm_start = warm_start self.n_jobs = n_jobs self.early_stopping = early_stopping self.scoring = scoring self.validation_fraction = validation_fraction self.n_iter_no_change = n_iter_no_change self.tol = tol self.verbose = verbose self.random_state = random_state self.tree = HistGradientBoostingClassifier( loss=loss, learning_rate=1, max_iter=1, max_leaf_nodes=max_leaf_nodes, max_depth=max_depth, min_samples_leaf=min_samples_leaf, l2_regularization=l2_regularization, max_bins=max_bins, categorical_features=categorical_features, monotonic_cst=monotonic_cst, early_stopping=early_stopping, scoring=scoring, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose, random_state=random_state) self.HistRF = BaggingClassifier(base_estimator=self.tree, n_estimators=n_estimators, bootstrap=bootstrap, bootstrap_features=bootstrap_features, oob_score=oob_score, warm_start=warm_start, n_jobs=n_jobs, random_state=random_state, verbose=verbose) def decision_function(self, X): return self.HistRF.decision_function(X) def fit(self, X, y, sample_weight=None): self.HistRF.fit(X, y, sample_weight=sample_weight) return self.HistRF def get_params(self, deep=True): return self.HistRF.get_params(deep=deep) def predict(self, X): return self.HistRF.predict(X) def predict_log_proba(self, X): return self.HistRF.predict_log_proba(X) def predict_proba(self, X): return self.HistRF.predict_proba(X) def score(self, X, y, sample_weight=None): return self.HistRF.score(X, y, sample_weight=sample_weight) def set_params(self, **params): return self.HistRF.set_params(**params)
class Bagging(Classifier): r"""Implementation of bagging classifier. Date: 2020 Author: Luka Pečnik License: MIT Reference: L. Breiman, “Bagging predictors”, Machine Learning, 24(2), 123-140, 1996. Documentation: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingClassifier.html See Also: * :class:`niaaml.classifiers.Classifier` """ Name = 'Bagging' def __init__(self, **kwargs): r"""Initialize Bagging instance. """ warnings.filterwarnings(action='ignore', category=ChangedBehaviorWarning) warnings.filterwarnings(action='ignore', category=ConvergenceWarning) warnings.filterwarnings(action='ignore', category=DataConversionWarning) warnings.filterwarnings(action='ignore', category=DataDimensionalityWarning) warnings.filterwarnings(action='ignore', category=EfficiencyWarning) warnings.filterwarnings(action='ignore', category=FitFailedWarning) warnings.filterwarnings(action='ignore', category=NonBLASDotWarning) warnings.filterwarnings(action='ignore', category=UndefinedMetricWarning) self._params = dict( n_estimators=ParameterDefinition(MinMax(min=10, max=111), np.uint), bootstrap=ParameterDefinition([True, False]), bootstrap_features=ParameterDefinition([True, False])) self.__bagging_classifier = BaggingClassifier() def set_parameters(self, **kwargs): r"""Set the parameters/arguments of the algorithm. """ self.__bagging_classifier.set_params(**kwargs) def fit(self, x, y, **kwargs): r"""Fit Bagging. Arguments: x (pandas.core.frame.DataFrame): n samples to classify. y (pandas.core.series.Series): n classes of the samples in the x array. Returns: None """ self.__bagging_classifier.fit(x, y) def predict(self, x, **kwargs): r"""Predict class for each sample (row) in x. Arguments: x (pandas.core.frame.DataFrame): n samples to classify. Returns: pandas.core.series.Series: n predicted classes. """ return self.__bagging_classifier.predict(x) def to_string(self): r"""User friendly representation of the object. Returns: str: User friendly representation of the object. """ return Classifier.to_string(self).format( name=self.Name, args=self._parameters_to_string( self.__bagging_classifier.get_params()))