def fit(self, X, y, sample_weight=None): from sklearn.tree import DecisionTreeClassifier self.max_features = float(self.max_features) # Heuristic to set the tree depth if check_none(self.max_depth): max_depth = self.max_depth = None else: num_features = X.shape[1] self.max_depth = int(self.max_depth) max_depth = max(1, int(np.round(self.max_depth * num_features, 0))) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) self.min_impurity_decrease = float(self.min_impurity_decrease) self.estimator = DecisionTreeClassifier( criterion=self.criterion, max_depth=max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, max_leaf_nodes=self.max_leaf_nodes, min_weight_fraction_leaf=self.min_weight_fraction_leaf, min_impurity_decrease=self.min_impurity_decrease, class_weight=self.class_weight, random_state=self.random_state) self.estimator.fit(X, y, sample_weight=sample_weight) return self
def __init__(self, n_estimators, criterion, min_samples_leaf, min_samples_split, max_features, bootstrap, max_leaf_nodes, max_depth, min_weight_fraction_leaf, min_impurity_decrease, oob_score=False, n_jobs=1, random_state=None, verbose=0, class_weight=None): self.n_estimators = int(n_estimators) self.estimator_increment = 10 if criterion not in ("gini", "entropy"): raise ValueError("'criterion' is not in ('gini', 'entropy'): " "%s" % criterion) self.criterion = criterion if check_none(max_depth): self.max_depth = None else: self.max_depth = int(max_depth) if check_none(max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(max_leaf_nodes) self.min_samples_leaf = int(min_samples_leaf) self.min_samples_split = int(min_samples_split) self.max_features = float(max_features) self.bootstrap = check_for_bool(bootstrap) self.min_weight_fraction_leaf = float(min_weight_fraction_leaf) self.min_impurity_decrease = float(min_impurity_decrease) self.oob_score = oob_score self.n_jobs = int(n_jobs) self.random_state = random_state self.verbose = int(verbose) self.class_weight = class_weight self.estimator = None
def _fit(self, X, Y=None): import sklearn.ensemble self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) self.bootstrap = check_for_bool(self.bootstrap) self.preprocessor = sklearn.ensemble.RandomTreesEmbedding( n_estimators=self.n_estimators, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, max_leaf_nodes=self.max_leaf_nodes, sparse_output=self.sparse_output, n_jobs=self.n_jobs, random_state=self.random_state ) self.preprocessor.fit(X, Y) return self
def fit(self, X, Y): import sklearn.ensemble from sklearn.experimental import enable_hist_gradient_boosting # noqa self.learning_rate = float(self.learning_rate) self.max_iter = int(self.max_iter) self.min_samples_leaf = int(self.min_samples_leaf) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.max_bins = int(self.max_bins) self.l2_regularization = float(self.l2_regularization) self.tol = float(self.tol) if check_none(self.scoring): self.scoring = None if self.early_stop == "off": self.n_iter_no_change = 0 self.validation_fraction_ = None elif self.early_stop == "train": self.n_iter_no_change = int(self.n_iter_no_change) self.validation_fraction_ = None elif self.early_stop == "valid": self.n_iter_no_change = int(self.n_iter_no_change) self.validation_fraction = float(self.validation_fraction) n_classes = len(np.unique(Y)) if self.validation_fraction * X.shape[0] < n_classes: self.validation_fraction_ = n_classes else: self.validation_fraction_ = self.validation_fraction else: raise ValueError("early_stop should be either off, train or valid") self.verbose = int(self.verbose) self.estimator = sklearn.ensemble.HistGradientBoostingClassifier( loss=self.loss, learning_rate=self.learning_rate, max_iter=self.max_iter, min_samples_leaf=self.min_samples_leaf, max_depth=self.max_depth, max_leaf_nodes=self.max_leaf_nodes, max_bins=self.max_bins, l2_regularization=self.l2_regularization, tol=self.tol, scoring=self.scoring, n_iter_no_change=self.n_iter_no_change, validation_fraction=self.validation_fraction_, verbose=self.verbose, random_state=self.random_state, ) self.estimator.fit(X, Y) return self
def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False): # Special fix for gradient boosting! if isinstance(X, np.ndarray): X = np.ascontiguousarray(X, dtype=X.dtype) if refit: self.estimator = None if self.estimator is None: self.learning_rate = float(self.learning_rate) self.n_estimators = int(self.n_estimators) self.subsample = float(self.subsample) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.min_weight_fraction_leaf = float( self.min_weight_fraction_leaf) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.max_features = float(self.max_features) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) self.verbose = int(self.verbose) self.estimator = sklearn.ensemble.GradientBoostingClassifier( loss=self.loss, learning_rate=self.learning_rate, n_estimators=n_iter, subsample=self.subsample, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_depth=self.max_depth, criterion=self.criterion, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, random_state=self.random_state, verbose=self.verbose, warm_start=True, ) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y, sample_weight=sample_weight) # Apparently this if is necessary if self.estimator.n_estimators >= self.n_estimators: self.fully_fit_ = True return self
def fit(self, X, y): import sklearn.ensemble from sklearn.experimental import enable_hist_gradient_boosting # noqa # Special fix for gradient boosting! if isinstance(X, np.ndarray): X = np.ascontiguousarray(X, dtype=X.dtype) self.learning_rate = float(self.learning_rate) self.max_iter = int(self.max_iter) self.min_samples_leaf = int(self.min_samples_leaf) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.max_bins = int(self.max_bins) self.l2_regularization = float(self.l2_regularization) self.tol = float(self.tol) if check_none(self.scoring): self.scoring = None if self.early_stop == "off": self.n_iter_no_change = 0 self.validation_fraction = None elif self.early_stop == "train": self.n_iter_no_change = int(self.n_iter_no_change) self.validation_fraction = None elif self.early_stop == "valid": self.n_iter_no_change = int(self.n_iter_no_change) self.validation_fraction = float(self.validation_fraction) else: raise ValueError("early_stop should be either off, train or valid") self.verbose = int(self.verbose) self.estimator = sklearn.ensemble.HistGradientBoostingRegressor( loss=self.loss, learning_rate=self.learning_rate, max_iter=self.max_iter, min_samples_leaf=self.min_samples_leaf, max_depth=self.max_depth, max_leaf_nodes=self.max_leaf_nodes, max_bins=self.max_bins, l2_regularization=self.l2_regularization, tol=self.tol, scoring=self.scoring, n_iter_no_change=self.n_iter_no_change, validation_fraction=self.validation_fraction, verbose=self.verbose, random_state=self.random_state, ) self.estimator.fit(X, y) return self
def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False): # Special fix for gradient boosting! if isinstance(X, np.ndarray): X = np.ascontiguousarray(X, dtype=X.dtype) if refit: self.estimator = None if self.estimator is None: self.learning_rate = float(self.learning_rate) self.n_estimators = int(self.n_estimators) self.subsample = float(self.subsample) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.max_features = float(self.max_features) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) self.verbose = int(self.verbose) self.estimator = sklearn.ensemble.GradientBoostingClassifier( loss=self.loss, learning_rate=self.learning_rate, n_estimators=n_iter, subsample=self.subsample, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, max_depth=self.max_depth, criterion=self.criterion, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, random_state=self.random_state, verbose=self.verbose, warm_start=True, ) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y, sample_weight=sample_weight) # Apparently this if is necessary if self.estimator.n_estimators >= self.n_estimators: self.fully_fit_ = True return self
def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False): from sklearn.ensemble import RandomForestClassifier if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.min_weight_fraction_leaf = float( self.min_weight_fraction_leaf) if self.max_features not in ("sqrt", "log2", "auto"): max_features = int(X.shape[1]**float(self.max_features)) else: max_features = self.max_features self.bootstrap = check_for_bool(self.bootstrap) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) # initial fit of only increment trees self.estimator = RandomForestClassifier( n_estimators=10, criterion='gini', max_features='auto', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, bootstrap=True, max_leaf_nodes=None, min_impurity_decrease=0.0, random_state=None, n_jobs=1, class_weight=None, warm_start=False) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y, sample_weight=sample_weight) return self
def iterative_fit(self, X, y, n_iter=1, refit=False): from sklearn.ensemble import ExtraTreesRegressor as ETR if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if self.criterion not in ("mse", "friedman_mse", "mae"): raise ValueError( "'criterion' is not in ('mse', 'friedman_mse', " "'mae): %s" % self.criterion) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_samples_leaf = int(self.min_samples_leaf) self.min_samples_split = int(self.min_samples_split) self.max_features = float(self.max_features) self.min_impurity_decrease = float(self.min_impurity_decrease) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) self.oob_score = check_for_bool(self.oob_score) self.bootstrap = check_for_bool(self.bootstrap) self.n_jobs = int(self.n_jobs) self.verbose = int(self.verbose) self.estimator = ETR(n_estimators=n_iter, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, min_weight_fraction_leaf=self.min_weight_fraction_leaf, min_impurity_decrease=self.min_impurity_decrease, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, warm_start=True) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y,) return self
def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False): from sklearn.ensemble import RandomForestClassifier if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) if self.max_features not in ("sqrt", "log2", "auto"): max_features = int(X.shape[1] ** float(self.max_features)) else: max_features = self.max_features self.bootstrap = check_for_bool(self.bootstrap) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) # initial fit of only increment trees self.estimator = RandomForestClassifier( n_estimators=n_iter, criterion=self.criterion, max_features=max_features, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, bootstrap=self.bootstrap, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, random_state=self.random_state, n_jobs=self.n_jobs, class_weight=self.class_weight, warm_start=True) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y, sample_weight=sample_weight) return self
def fit(self, X, Y): from sklearn.ensemble import ExtraTreesRegressor from sklearn.feature_selection import SelectFromModel self.n_estimators = int(self.n_estimators) self.min_samples_leaf = int(self.min_samples_leaf) self.min_samples_split = int(self.min_samples_split) self.max_features = float(self.max_features) self.bootstrap = check_for_bool(self.bootstrap) self.n_jobs = int(self.n_jobs) self.verbose = int(self.verbose) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) num_features = X.shape[1] max_features = int( float(self.max_features) * (np.log(num_features) + 1)) # Use at most half of the features max_features = max(1, min(int(X.shape[1] / 2), max_features)) estimator = ExtraTreesRegressor( n_estimators=self.n_estimators, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, min_weight_fraction_leaf=self.min_weight_fraction_leaf, random_state=self.random_state) estimator.fit(X, Y) self.preprocessor = SelectFromModel(estimator=estimator, threshold='mean', prefit=True) return self
def iterative_fit(self, X, y, n_iter=1, refit=False): from sklearn.ensemble import RandomForestRegressor if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.max_features = float(self.max_features) self.bootstrap = check_for_bool(self.bootstrap) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) self.estimator = RandomForestRegressor( n_estimators=n_iter, criterion=self.criterion, max_features=self.max_features, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, bootstrap=self.bootstrap, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, random_state=self.random_state, n_jobs=self.n_jobs, warm_start=True) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y) return self
def fit(self, X, Y): import sklearn.svm from sklearn.feature_selection import SelectFromModel self.C = float(self.C) self.tol = float(self.tol) self.dual = check_for_bool(self.dual) self.fit_intercept = check_for_bool(self.fit_intercept) self.intercept_scaling = float(self.intercept_scaling) if check_none(self.class_weight): self.class_weight = None estimator = sklearn.svm.LinearSVC( penalty=self.penalty, loss=self.loss, dual=self.dual, tol=self.tol, C=self.C, class_weight=self.class_weight, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, multi_class=self.multi_class, random_state=self.random_state) estimator.fit(X, Y) self.preprocessor = SelectFromModel(estimator=estimator, threshold='mean', prefit=True) return self
def fit(self, X, Y): import sklearn.discriminant_analysis import sklearn.multiclass if check_none(self.shrinkage): self.shrinkage_ = None solver = 'svd' elif self.shrinkage == "auto": self.shrinkage_ = 'auto' solver = 'lsqr' elif self.shrinkage == "manual": self.shrinkage_ = float(self.shrinkage_factor) solver = 'lsqr' else: raise ValueError(self.shrinkage) self.tol = float(self.tol) estimator = sklearn.discriminant_analysis.LinearDiscriminantAnalysis( shrinkage=self.shrinkage_, tol=self.tol, solver=solver) if len(Y.shape) == 2 and Y.shape[1] > 1: self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator, n_jobs=1) else: self.estimator = estimator self.estimator.fit(X, Y) return self
def fit(self, X, Y): import sklearn.svm import sklearn.multiclass self.C = float(self.C) self.tol = float(self.tol) self.dual = check_for_bool(self.dual) self.fit_intercept = check_for_bool(self.fit_intercept) self.intercept_scaling = float(self.intercept_scaling) if check_none(self.class_weight): self.class_weight = None estimator = sklearn.svm.LinearSVC(penalty=self.penalty, loss=self.loss, dual=self.dual, tol=self.tol, C=self.C, class_weight=self.class_weight, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, multi_class=self.multi_class, random_state=self.random_state) if len(Y.shape) == 2 and Y.shape[1] > 1: self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator, n_jobs=1) else: self.estimator = estimator self.estimator.fit(X, Y) return self
def _fit(self, X, Y=None): import sklearn.decomposition self.whiten = check_for_bool(self.whiten) if check_none(self.n_components): self.n_components = None else: self.n_components = int(self.n_components) self.preprocessor = sklearn.decomposition.FastICA( n_components=self.n_components, algorithm=self.algorithm, fun=self.fun, whiten=self.whiten, random_state=self.random_state) # Make the RuntimeWarning an Exception! with warnings.catch_warnings(): warnings.filterwarnings( "error", message='array must not contain infs or NaNs') try: return self.preprocessor.fit_transform(X) except ValueError as e: if 'array must not contain infs or NaNs' in e.args[0]: raise ValueError( "Bug in scikit-learn: https://github.com/scikit-learn/scikit-learn/pull/2738" ) return self
def fit(self, X, Y, sample_weight=None): from sklearn.ensemble import ExtraTreesClassifier from sklearn.feature_selection import SelectFromModel self.n_estimators = int(self.n_estimators) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.bootstrap = check_for_bool(self.bootstrap) self.n_jobs = int(self.n_jobs) self.min_impurity_decrease = float(self.min_impurity_decrease) self.max_features = self.max_features self.min_samples_leaf = int(self.min_samples_leaf) self.min_samples_split = int(self.min_samples_split) self.verbose = int(self.verbose) max_features = int(X.shape[1]**float(self.max_features)) estimator = ExtraTreesClassifier( n_estimators=self.n_estimators, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, class_weight=self.class_weight) estimator.fit(X, Y, sample_weight=sample_weight) self.preprocessor = SelectFromModel(estimator=estimator, threshold='mean', prefit=True) return self
def fit(self, X, Y, sample_weight=None): from sklearn.ensemble import ExtraTreesClassifier from sklearn.feature_selection import SelectFromModel self.n_estimators = int(self.n_estimators) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.bootstrap = check_for_bool(self.bootstrap) self.n_jobs = int(self.n_jobs) self.min_impurity_decrease = float(self.min_impurity_decrease) self.max_features = self.max_features self.min_samples_leaf = int(self.min_samples_leaf) self.min_samples_split = int(self.min_samples_split) self.verbose = int(self.verbose) max_features = int(X.shape[1] ** float(self.max_features)) estimator = ExtraTreesClassifier( n_estimators=self.n_estimators, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, class_weight=self.class_weight) estimator.fit(X, Y, sample_weight=sample_weight) self.preprocessor = SelectFromModel(estimator=estimator, threshold='mean', prefit=True) return self
def iterative_fit(self, X, y, n_iter=1, refit=False): from sklearn.ensemble import RandomForestRegressor if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.max_features = float(self.max_features) self.bootstrap = check_for_bool(self.bootstrap) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) self.estimator = RandomForestRegressor( n_estimators=0, criterion=self.criterion, max_features=self.max_features, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, bootstrap=self.bootstrap, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, random_state=self.random_state, n_jobs=self.n_jobs, warm_start=True) self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y) return self
def fit(self, X, Y): import sklearn.svm try: soft, hard = resource.getrlimit(resource.RLIMIT_AS) if soft > 0: soft /= 1024 * 1024 maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024 cache_size = (soft - maxrss) / 1.5 else: cache_size = 200 except Exception: cache_size = 200 self.C = float(self.C) if self.degree is None: self.degree = 3 else: self.degree = int(self.degree) if self.gamma is None: self.gamma = 0.0 else: self.gamma = float(self.gamma) if self.coef0 is None: self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.tol = float(self.tol) self.max_iter = float(self.max_iter) self.shrinking = check_for_bool(self.shrinking) if check_none(self.class_weight): self.class_weight = None self.estimator = sklearn.svm.SVC(C=self.C, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, tol=self.tol, class_weight=self.class_weight, max_iter=self.max_iter, random_state=self.random_state, cache_size=cache_size, decision_function_shape='ovr') self.estimator.fit(X, Y) return self
def fit(self, X, y=None): if check_none(self.shrinkage): self.shrinkage = None else: self.shrinkage = float(self.shrinkage) self.tol = float(self.tol) import sklearn.discriminant_analysis self.preprocessor = sklearn.discriminant_analysis.LinearDiscriminantAnalysis( shrinkage=self.shrinkage, solver=self.solver, tol=self.tol, ) self.preprocessor.fit(X, y) return self
def fit(self, X, Y): import sklearn.svm try: soft, hard = resource.getrlimit(resource.RLIMIT_AS) if soft > 0: soft /= 1024 * 1024 maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024 cache_size = (soft - maxrss) / 1.5 else: cache_size = 200 except Exception: cache_size = 200 self.C = float(self.C) self.epsilon = float(self.epsilon) self.tol = float(self.tol) self.shrinking = check_for_bool(self.shrinking) self.degree = int(self.degree) self.gamma = float(self.gamma) if check_none(self.coef0): self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.verbose = int(self.verbose) self.max_iter = int(self.max_iter) self.estimator = sklearn.svm.SVR( kernel=self.kernel, C=self.C, epsilon=self.epsilon, tol=self.tol, shrinking=self.shrinking, degree=self.degree, gamma=self.gamma, coef0=self.coef0, cache_size=cache_size, verbose=self.verbose, max_iter=self.max_iter ) self.scaler = sklearn.preprocessing.StandardScaler(copy=True) self.scaler.fit(Y.reshape((-1, 1))) Y_scaled = self.scaler.transform(Y.reshape((-1, 1))).ravel() self.estimator.fit(X, Y_scaled) return self
def _fit(self, X, y=None): self.use_minimum_fraction = check_for_bool(self.use_minimum_fraction) if self.use_minimum_fraction is False: self.minimum_fraction = None else: self.minimum_fraction = float(self.minimum_fraction) if check_none(self.categorical_features): categorical_features = [] else: categorical_features = self.categorical_features self.preprocessor = autosklearn.pipeline.implementations.OneHotEncoder\ .OneHotEncoder(minimum_fraction=self.minimum_fraction, categorical_features=categorical_features, sparse=True) return self.preprocessor.fit_transform(X)
def fit(self, X, y=None): """Fit the preprocessor.""" if check_none(self.shrinkage): self.shrinkage = None else: self.shrinkage = float(self.shrinkage) self.n_components = int(self.n_components) self.tol = float(self.tol) import sklearn.discriminant_analysis self.preprocessor = ( sklearn.discriminant_analysis.LinearDiscriminantAnalysis( shrinkage=self.shrinkage, solver=self.solver, n_components=self.n_components, tol=self.tol, )) self.preprocessor.fit(X, y) return self
def _fit(self, X, Y=None): import sklearn.decomposition self.whiten = check_for_bool(self.whiten) if check_none(self.n_components): self.n_components = None else: self.n_components = int(self.n_components) self.preprocessor = sklearn.decomposition.FastICA( n_components=self.n_components, algorithm=self.algorithm, fun=self.fun, whiten=self.whiten, random_state=self.random_state ) # Make the RuntimeWarning an Exception! with warnings.catch_warnings(): warnings.filterwarnings("error", message='array must not contain infs or NaNs') try: return self.preprocessor.fit_transform(X) except ValueError as e: if 'array must not contain infs or NaNs' in e.args[0]: raise ValueError("Bug in scikit-learn: https://github.com/scikit-learn/scikit-learn/pull/2738") return self
def fit(self, X, Y): import sklearn.svm # Calculate the size of the kernel cache (in MB) for sklearn's LibSVM. The cache size is # calculated as 2/3 of the available memory (which is calculated as the memory limit minus # the used memory) try: # Retrieve memory limits imposed on the process soft, hard = resource.getrlimit(resource.RLIMIT_AS) if soft > 0: # Convert limit to units of megabytes soft /= 1024 * 1024 # Retrieve memory used by this process maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024 # In MacOS, the MaxRSS output of resource.getrusage in bytes; on other platforms, # it's in kilobytes if sys.platform == 'darwin': maxrss = maxrss / 1024 cache_size = (soft - maxrss) / 1.5 if cache_size < 0: cache_size = 200 else: cache_size = 200 except Exception: cache_size = 200 self.C = float(self.C) if self.degree is None: self.degree = 3 else: self.degree = int(self.degree) if self.gamma is None: self.gamma = 0.0 else: self.gamma = float(self.gamma) if self.coef0 is None: self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.tol = float(self.tol) self.max_iter = float(self.max_iter) self.shrinking = check_for_bool(self.shrinking) if check_none(self.class_weight): self.class_weight = None self.estimator = sklearn.svm.SVC(C=self.C, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, tol=self.tol, class_weight=self.class_weight, max_iter=self.max_iter, random_state=self.random_state, cache_size=cache_size, decision_function_shape='ovr') self.estimator.fit(X, Y) return self
def fit(self, X, Y): import sklearn.svm # Calculate the size of the kernel cache (in MB) for sklearn's LibSVM. The cache size is # calculated as 2/3 of the available memory (which is calculated as the memory limit minus # the used memory) try: # Retrieve memory limits imposed on the process soft, hard = resource.getrlimit(resource.RLIMIT_AS) if soft > 0: # Convert limit to units of megabytes soft /= 1024 * 1024 # Retrieve memory used by this process maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024 # In MacOS, the MaxRSS output of resource.getrusage in bytes; on other platforms, # it's in kilobytes if sys.platform == 'darwin': maxrss = maxrss / 1024 cache_size = (soft - maxrss) / 1.5 if cache_size < 0: cache_size = 200 else: cache_size = 200 except Exception: cache_size = 200 self.C = float(self.C) self.epsilon = float(self.epsilon) self.tol = float(self.tol) self.shrinking = check_for_bool(self.shrinking) self.degree = int(self.degree) self.gamma = float(self.gamma) if check_none(self.coef0): self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.verbose = int(self.verbose) self.max_iter = int(self.max_iter) self.estimator = sklearn.svm.SVR( kernel=self.kernel, C=self.C, epsilon=self.epsilon, tol=self.tol, shrinking=self.shrinking, degree=self.degree, gamma=self.gamma, coef0=self.coef0, cache_size=cache_size, verbose=self.verbose, max_iter=self.max_iter ) self.scaler = sklearn.preprocessing.StandardScaler(copy=True) self.scaler.fit(Y.reshape((-1, 1))) Y_scaled = self.scaler.transform(Y.reshape((-1, 1))).ravel() self.estimator.fit(X, Y_scaled) return self
def fit(self, X, Y): import sklearn.svm # Calculate the size of the kernel cache (in MB) for sklearn's LibSVM. The cache size is # calculated as 2/3 of the available memory (which is calculated as the memory limit minus # the used memory) try: # Retrieve memory limits imposed on the process soft, hard = resource.getrlimit(resource.RLIMIT_AS) if soft > 0: # Convert limit to units of megabytes soft /= 1024 * 1024 # Retrieve memory used by this process maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024 # In MacOS, the MaxRSS output of resource.getrusage in bytes; on other platforms, # it's in kilobytes if sys.platform == 'darwin': maxrss = maxrss / 1024 cache_size = (soft - maxrss) / 1.5 if cache_size < 0: cache_size = 200 else: cache_size = 200 except Exception: cache_size = 200 self.C = float(self.C) self.epsilon = float(self.epsilon) self.tol = float(self.tol) self.shrinking = check_for_bool(self.shrinking) self.degree = int(self.degree) self.gamma = float(self.gamma) if check_none(self.coef0): self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.verbose = int(self.verbose) self.max_iter = int(self.max_iter) self.estimator = sklearn.svm.SVR(kernel=self.kernel, C=self.C, epsilon=self.epsilon, tol=self.tol, shrinking=self.shrinking, degree=self.degree, gamma=self.gamma, coef0=self.coef0, cache_size=cache_size, verbose=self.verbose, max_iter=self.max_iter) self.scaler = sklearn.preprocessing.StandardScaler(copy=True) self.scaler.fit(Y.reshape((-1, 1))) Y_scaled = self.scaler.transform(Y.reshape((-1, 1))).ravel() self.estimator.fit(X, Y_scaled) return self
def iterative_fit(self, X, y, n_iter=2, refit=False): """ Set n_iter=2 for the same reason as for SGD """ import sklearn.ensemble from sklearn.experimental import enable_hist_gradient_boosting # noqa if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.learning_rate = float(self.learning_rate) self.max_iter = int(self.max_iter) self.min_samples_leaf = int(self.min_samples_leaf) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.max_bins = int(self.max_bins) self.l2_regularization = float(self.l2_regularization) self.verbose = int(self.verbose) n_iter = int(np.ceil(n_iter)) # initial fit of only increment trees self.estimator = sklearn.ensemble.HistGradientBoostingClassifier( loss=self.loss, learning_rate=self.learning_rate, max_iter=n_iter, min_samples_leaf=self.min_samples_leaf, max_depth=self.max_depth, max_leaf_nodes=self.max_leaf_nodes, max_bins=self.max_bins, l2_regularization=self.l2_regularization, tol=None, scoring=self.scoring, n_iter_no_change=0, validation_fraction=None, verbose=self.verbose, warm_start=True, random_state=self.random_state, ) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, self.max_iter) self.estimator.fit(X, y) if self.estimator.max_iter > self.estimator.n_iter_: raise ValueError() if self.estimator.max_iter >= self.max_iter: self.fully_fit_ = True return self
def iterative_fit(self, X, y, n_iter=2, refit=False): """ Set n_iter=2 for the same reason as for SGD """ import sklearn.ensemble from sklearn.experimental import enable_hist_gradient_boosting # noqa if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.learning_rate = float(self.learning_rate) self.max_iter = int(self.max_iter) self.min_samples_leaf = int(self.min_samples_leaf) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.max_bins = int(self.max_bins) self.l2_regularization = float(self.l2_regularization) self.tol = float(self.tol) if check_none(self.scoring): self.scoring = None if self.early_stop == "off": self.n_iter_no_change = 0 self.validation_fraction_ = None elif self.early_stop == "train": self.n_iter_no_change = int(self.n_iter_no_change) self.validation_fraction_ = None elif self.early_stop == "valid": self.n_iter_no_change = int(self.n_iter_no_change) self.validation_fraction_ = float(self.validation_fraction) else: raise ValueError( "early_stop should be either off, train or valid") self.verbose = int(self.verbose) n_iter = int(np.ceil(n_iter)) self.estimator = sklearn.ensemble.HistGradientBoostingRegressor( loss=self.loss, learning_rate=self.learning_rate, max_iter=n_iter, min_samples_leaf=self.min_samples_leaf, max_depth=self.max_depth, max_leaf_nodes=self.max_leaf_nodes, max_bins=self.max_bins, l2_regularization=self.l2_regularization, tol=self.tol, scoring=self.scoring, n_iter_no_change=self.n_iter_no_change, validation_fraction=self.validation_fraction_, verbose=self.verbose, warm_start=True, random_state=self.random_state, ) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, self.max_iter) self.estimator.fit(X, y) if (self.estimator.max_iter >= self.max_iter or self.estimator.max_iter > self.estimator.n_iter_): self.fully_fit_ = True return self