def fit(self, X, Y): import sklearn.svm from sklearn.feature_selection import SelectFromModel self.C = float(self.C) self.tol = float(self.tol) self.dual = check_for_bool(self.dual) self.fit_intercept = check_for_bool(self.fit_intercept) self.intercept_scaling = float(self.intercept_scaling) if check_none(self.class_weight): self.class_weight = None estimator = sklearn.svm.LinearSVC( penalty=self.penalty, loss=self.loss, dual=self.dual, tol=self.tol, C=self.C, class_weight=self.class_weight, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, multi_class=self.multi_class, random_state=self.random_state) estimator.fit(X, Y) self.preprocessor = SelectFromModel(estimator=estimator, threshold='mean', prefit=True) return self
def fit(self, X, Y): import sklearn.svm import sklearn.multiclass self.C = float(self.C) self.tol = float(self.tol) self.dual = check_for_bool(self.dual) self.fit_intercept = check_for_bool(self.fit_intercept) self.intercept_scaling = float(self.intercept_scaling) if check_none(self.class_weight): self.class_weight = None estimator = sklearn.svm.LinearSVC(penalty=self.penalty, loss=self.loss, dual=self.dual, tol=self.tol, C=self.C, class_weight=self.class_weight, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, multi_class=self.multi_class, random_state=self.random_state) if len(Y.shape) == 2 and Y.shape[1] > 1: self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator, n_jobs=1) else: self.estimator = estimator self.estimator.fit(X, Y) return self
def fit(self, X, Y): import sklearn.svm import sklearn.multiclass self.C = float(self.C) self.tol = float(self.tol) self.dual = check_for_bool(self.dual) self.fit_intercept = check_for_bool(self.fit_intercept) self.intercept_scaling = float(self.intercept_scaling) if check_none(self.class_weight): self.class_weight = None estimator = sklearn.svm.LinearSVC(penalty=self.penalty, loss=self.loss, dual=self.dual, tol=self.tol, C=self.C, class_weight=self.class_weight, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, multi_class=self.multi_class, random_state=self.random_state) if len(Y.shape) == 2 and Y.shape[1] > 1: self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator, n_jobs=1) else: self.estimator = estimator self.estimator.fit(X, Y) return self
def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False): from sklearn.ensemble import ExtraTreesClassifier as ETC if refit: self.estimator = None if self.estimator is None: max_features = int(X.shape[1]**float(self.max_features)) if self.criterion not in ("gini", "entropy"): raise ValueError("'criterion' is not in ('gini', 'entropy'): " "%s" % self.criterion) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_samples_leaf = int(self.min_samples_leaf) self.min_samples_split = int(self.min_samples_split) self.max_features = float(self.max_features) self.min_impurity_decrease = float(self.min_impurity_decrease) self.min_weight_fraction_leaf = float( self.min_weight_fraction_leaf) self.oob_score = check_for_bool(self.oob_score) self.bootstrap = check_for_bool(self.bootstrap) self.n_jobs = int(self.n_jobs) self.verbose = int(self.verbose) self.estimator = ETC( n_estimators=n_iter, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, min_weight_fraction_leaf=self.min_weight_fraction_leaf, min_impurity_decrease=self.min_impurity_decrease, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, class_weight=self.class_weight, warm_start=True) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y, sample_weight=sample_weight) return self
def iterative_fit(self, X, y, n_iter=1, refit=False): from sklearn.ensemble import ExtraTreesRegressor as ETR if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if self.criterion not in ("mse", "friedman_mse", "mae"): raise ValueError( "'criterion' is not in ('mse', 'friedman_mse', " "'mae): %s" % self.criterion) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_samples_leaf = int(self.min_samples_leaf) self.min_samples_split = int(self.min_samples_split) self.max_features = float(self.max_features) self.min_impurity_decrease = float(self.min_impurity_decrease) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) self.oob_score = check_for_bool(self.oob_score) self.bootstrap = check_for_bool(self.bootstrap) self.n_jobs = int(self.n_jobs) self.verbose = int(self.verbose) self.estimator = ETR(n_estimators=n_iter, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=self.max_features, max_leaf_nodes=self.max_leaf_nodes, min_weight_fraction_leaf=self.min_weight_fraction_leaf, min_impurity_decrease=self.min_impurity_decrease, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, warm_start=True) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y,) return self
def fit(self, X, Y): import sklearn.preprocessing self.degree = int(self.degree) self.interaction_only = check_for_bool(self.interaction_only) self.include_bias = check_for_bool(self.include_bias) self.preprocessor = sklearn.preprocessing.PolynomialFeatures( degree=self.degree, interaction_only=self.interaction_only, include_bias=self.include_bias) self.preprocessor.fit(X, Y) return self
def fit(self, X, Y): import sklearn.preprocessing self.degree = int(self.degree) self.interaction_only = check_for_bool(self.interaction_only) self.include_bias = check_for_bool(self.include_bias) self.preprocessor = sklearn.preprocessing.PolynomialFeatures( degree=self.degree, interaction_only=self.interaction_only, include_bias=self.include_bias) self.preprocessor.fit(X, Y) return self
def _fit(self, X, Y=None): import sklearn.decomposition self.whiten = check_for_bool(self.whiten) if check_none(self.n_components): self.n_components = None else: self.n_components = int(self.n_components) self.preprocessor = sklearn.decomposition.FastICA( n_components=self.n_components, algorithm=self.algorithm, fun=self.fun, whiten=self.whiten, random_state=self.random_state) # Make the RuntimeWarning an Exception! with warnings.catch_warnings(): warnings.filterwarnings( "error", message='array must not contain infs or NaNs') try: return self.preprocessor.fit_transform(X) except ValueError as e: if 'array must not contain infs or NaNs' in e.args[0]: raise ValueError( "Bug in scikit-learn: https://github.com/scikit-learn/scikit-learn/pull/2738" ) return self
def fit(self, X, Y): import sklearn.linear_model self.n_iter = int(self.n_iter) self.tol = float(self.tol) self.alpha_1 = float(self.alpha_1) self.alpha_2 = float(self.alpha_2) self.lambda_1 = float(self.lambda_1) self.lambda_2 = float(self.lambda_2) self.threshold_lambda = float(self.threshold_lambda) self.fit_intercept = check_for_bool(self.fit_intercept) self.estimator = sklearn.linear_model.\ ARDRegression(n_iter=self.n_iter, tol=self.tol, alpha_1=self.alpha_1, alpha_2=self.alpha_2, lambda_1=self.lambda_1, lambda_2=self.lambda_2, compute_score=False, threshold_lambda=self.threshold_lambda, fit_intercept=True, normalize=False, copy_X=False, verbose=False) self.estimator.fit(X, Y) return self
def fit(self, X, y): import sklearn.naive_bayes import scipy.sparse self.fit_prior = check_for_bool(self.fit_prior) self.alpha = float(self.alpha) self.n_iter = 0 self.fully_fit_ = False self.estimator = sklearn.naive_bayes.MultinomialNB( alpha=self.alpha, fit_prior=self.fit_prior, ) self.classes_ = np.unique(y.astype(int)) # Because the pipeline guarantees that each feature is positive, # clip all values below zero to zero if scipy.sparse.issparse(X): X.data[X.data < 0] = 0.0 else: X[X < 0] = 0.0 # Fallback for multilabel classification if len(y.shape) > 1 and y.shape[1] > 1: import sklearn.multiclass self.estimator = sklearn.multiclass.OneVsRestClassifier( self.estimator, n_jobs=1) self.estimator.fit(X, y) return self
def _fit(self, X, Y=None): import sklearn.ensemble self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) self.bootstrap = check_for_bool(self.bootstrap) self.preprocessor = sklearn.ensemble.RandomTreesEmbedding( n_estimators=self.n_estimators, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, max_leaf_nodes=self.max_leaf_nodes, sparse_output=self.sparse_output, n_jobs=self.n_jobs, random_state=self.random_state) self.preprocessor.fit(X, Y) return self
def fit(self, X, Y): import sklearn.linear_model self.n_iter = int(self.n_iter) self.tol = float(self.tol) self.alpha_1 = float(self.alpha_1) self.alpha_2 = float(self.alpha_2) self.lambda_1 = float(self.lambda_1) self.lambda_2 = float(self.lambda_2) self.threshold_lambda = float(self.threshold_lambda) self.fit_intercept = check_for_bool(self.fit_intercept) self.estimator = sklearn.linear_model.\ ARDRegression(n_iter=self.n_iter, tol=self.tol, alpha_1=self.alpha_1, alpha_2=self.alpha_2, lambda_1=self.lambda_1, lambda_2=self.lambda_2, compute_score=False, threshold_lambda=self.threshold_lambda, fit_intercept=True, normalize=False, copy_X=False, verbose=False) self.estimator.fit(X, Y) return self
def _fit(self, X, Y=None): import sklearn.ensemble self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) self.bootstrap = check_for_bool(self.bootstrap) self.preprocessor = sklearn.ensemble.RandomTreesEmbedding( n_estimators=self.n_estimators, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, max_leaf_nodes=self.max_leaf_nodes, sparse_output=self.sparse_output, n_jobs=self.n_jobs, random_state=self.random_state ) self.preprocessor.fit(X, Y) return self
def __init__(self, n_estimators, criterion, min_samples_leaf, min_samples_split, max_features, bootstrap, max_leaf_nodes, max_depth, min_weight_fraction_leaf, min_impurity_decrease, oob_score=False, n_jobs=1, random_state=None, verbose=0, class_weight=None): self.n_estimators = int(n_estimators) self.estimator_increment = 10 if criterion not in ("gini", "entropy"): raise ValueError("'criterion' is not in ('gini', 'entropy'): " "%s" % criterion) self.criterion = criterion if check_none(max_depth): self.max_depth = None else: self.max_depth = int(max_depth) if check_none(max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(max_leaf_nodes) self.min_samples_leaf = int(min_samples_leaf) self.min_samples_split = int(min_samples_split) self.max_features = float(max_features) self.bootstrap = check_for_bool(bootstrap) self.min_weight_fraction_leaf = float(min_weight_fraction_leaf) self.min_impurity_decrease = float(min_impurity_decrease) self.oob_score = oob_score self.n_jobs = int(n_jobs) self.random_state = random_state self.verbose = int(verbose) self.class_weight = class_weight self.estimator = None
def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False): from sklearn.ensemble import RandomForestClassifier if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.min_weight_fraction_leaf = float( self.min_weight_fraction_leaf) if self.max_features not in ("sqrt", "log2", "auto"): max_features = int(X.shape[1]**float(self.max_features)) else: max_features = self.max_features self.bootstrap = check_for_bool(self.bootstrap) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) # initial fit of only increment trees self.estimator = RandomForestClassifier( n_estimators=10, criterion='gini', max_features='auto', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, bootstrap=True, max_leaf_nodes=None, min_impurity_decrease=0.0, random_state=None, n_jobs=1, class_weight=None, warm_start=False) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y, sample_weight=sample_weight) return self
def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False): from sklearn.ensemble import RandomForestClassifier if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) if self.max_features not in ("sqrt", "log2", "auto"): max_features = int(X.shape[1] ** float(self.max_features)) else: max_features = self.max_features self.bootstrap = check_for_bool(self.bootstrap) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) # initial fit of only increment trees self.estimator = RandomForestClassifier( n_estimators=n_iter, criterion=self.criterion, max_features=max_features, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, bootstrap=self.bootstrap, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, random_state=self.random_state, n_jobs=self.n_jobs, class_weight=self.class_weight, warm_start=True) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y, sample_weight=sample_weight) return self
def fit(self, X, Y): import sklearn.svm self.C = float(self.C) self.tol = float(self.tol) self.epsilon = float(self.epsilon) self.dual = check_for_bool(self.dual) self.fit_intercept = check_for_bool(self.fit_intercept) self.intercept_scaling = float(self.intercept_scaling) self.estimator = sklearn.svm.LinearSVR(epsilon=self.epsilon, loss=self.loss, dual=self.dual, tol=self.tol, C=self.C, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, random_state=self.random_state) self.estimator.fit(X, Y) return self
def fit(self, X, Y): import sklearn.svm self.C = float(self.C) self.tol = float(self.tol) self.epsilon = float(self.epsilon) self.dual = check_for_bool(self.dual) self.fit_intercept = check_for_bool(self.fit_intercept) self.intercept_scaling = float(self.intercept_scaling) self.estimator = sklearn.svm.LinearSVR( epsilon=self.epsilon, loss=self.loss, dual=self.dual, tol=self.tol, C=self.C, fit_intercept=self.fit_intercept, intercept_scaling=self.intercept_scaling, random_state=self.random_state) self.estimator.fit(X, Y) return self
def fit(self, X, Y=None): import sklearn.decomposition n_components = float(self.keep_variance) self.whiten = check_for_bool(self.whiten) self.preprocessor = sklearn.decomposition.PCA( n_components=n_components, whiten=self.whiten, copy=True) self.preprocessor.fit(X) if not np.isfinite(self.preprocessor.components_).all(): raise ValueError("PCA found non-finite components.") return self
def fit(self, X, Y): from sklearn.ensemble import ExtraTreesRegressor from sklearn.feature_selection import SelectFromModel self.n_estimators = int(self.n_estimators) self.min_samples_leaf = int(self.min_samples_leaf) self.min_samples_split = int(self.min_samples_split) self.max_features = float(self.max_features) self.bootstrap = check_for_bool(self.bootstrap) self.n_jobs = int(self.n_jobs) self.verbose = int(self.verbose) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) num_features = X.shape[1] max_features = int( float(self.max_features) * (np.log(num_features) + 1)) # Use at most half of the features max_features = max(1, min(int(X.shape[1] / 2), max_features)) estimator = ExtraTreesRegressor( n_estimators=self.n_estimators, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, min_weight_fraction_leaf=self.min_weight_fraction_leaf, random_state=self.random_state) estimator.fit(X, Y) self.preprocessor = SelectFromModel(estimator=estimator, threshold='mean', prefit=True) return self
def fit(self, X, Y): import sklearn.svm try: soft, hard = resource.getrlimit(resource.RLIMIT_AS) if soft > 0: soft /= 1024 * 1024 maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024 cache_size = (soft - maxrss) / 1.5 else: cache_size = 200 except Exception: cache_size = 200 self.C = float(self.C) if self.degree is None: self.degree = 3 else: self.degree = int(self.degree) if self.gamma is None: self.gamma = 0.0 else: self.gamma = float(self.gamma) if self.coef0 is None: self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.tol = float(self.tol) self.max_iter = float(self.max_iter) self.shrinking = check_for_bool(self.shrinking) if check_none(self.class_weight): self.class_weight = None self.estimator = sklearn.svm.SVC(C=self.C, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, tol=self.tol, class_weight=self.class_weight, max_iter=self.max_iter, random_state=self.random_state, cache_size=cache_size, decision_function_shape='ovr') self.estimator.fit(X, Y) return self
def fit(self, X, Y): import sklearn.linear_model self.alpha = float(self.alpha) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) self.estimator = sklearn.linear_model.Ridge(alpha=self.alpha, fit_intercept=self.fit_intercept, tol=self.tol, copy_X=True, normalize=False, random_state=self.random_state) self.estimator.fit(X, Y) return self
def iterative_fit(self, X, y, n_iter=1, refit=False): import sklearn.naive_bayes import scipy.sparse if refit: self.estimator = None if self.estimator is None: self.fit_prior = check_for_bool(self.fit_prior) self.alpha = float(self.alpha) self.n_iter = 0 self.fully_fit_ = False self.estimator = sklearn.naive_bayes.MultinomialNB( alpha=self.alpha, fit_prior=self.fit_prior) self.classes_ = np.unique(y.astype(int)) # Because the pipeline guarantees that each feature is positive, # clip all values below zero to zero if scipy.sparse.issparse(X): X.data[X.data < 0] = 0.0 else: X[X < 0] = 0.0 # Fallback for multilabel classification if len(y.shape) > 1 and y.shape[1] > 1: import sklearn.multiclass self.estimator.n_iter = self.n_iter self.estimator = sklearn.multiclass.OneVsRestClassifier( self.estimator, n_jobs=1) self.estimator.fit(X, y) self.fully_fit_ = True else: for iter in range(n_iter): start = min(self.n_iter * 1000, y.shape[0]) stop = min((self.n_iter + 1) * 1000, y.shape[0]) if X[start:stop].shape[0] == 0: self.fully_fit_ = True break self.estimator.partial_fit(X[start:stop], y[start:stop], self.classes_) self.n_iter += 1 if stop >= len(y): self.fully_fit_ = True break return self
def iterative_fit(self, X, y, n_iter=1, refit=False): from sklearn.ensemble import RandomForestRegressor if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.max_features = float(self.max_features) self.bootstrap = check_for_bool(self.bootstrap) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) self.estimator = RandomForestRegressor( n_estimators=n_iter, criterion=self.criterion, max_features=self.max_features, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, bootstrap=self.bootstrap, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, random_state=self.random_state, n_jobs=self.n_jobs, warm_start=True) else: self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y) return self
def iterative_fit(self, X, y, n_iter=1, refit=False): import sklearn.naive_bayes import scipy.sparse if refit: self.estimator = None if self.estimator is None: self.fit_prior = check_for_bool(self.fit_prior) self.alpha = float(self.alpha) self.n_iter = 0 self.fully_fit_ = False self.estimator = sklearn.naive_bayes.MultinomialNB( alpha=self.alpha, fit_prior=self.fit_prior) self.classes_ = np.unique(y.astype(int)) # Because the pipeline guarantees that each feature is positive, # clip all values below zero to zero if scipy.sparse.issparse(X): X.data[X.data < 0] = 0.0 else: X[X < 0] = 0.0 # Fallback for multilabel classification if len(y.shape) > 1 and y.shape[1] > 1: import sklearn.multiclass self.estimator.n_iter = self.n_iter self.estimator = sklearn.multiclass.OneVsRestClassifier( self.estimator, n_jobs=1) self.estimator.fit(X, y) self.fully_fit_ = True else: for iter in range(n_iter): start = min(self.n_iter * 1000, y.shape[0]) stop = min((self.n_iter + 1) * 1000, y.shape[0]) if X[start:stop].shape[0] == 0: self.fully_fit_ = True break self.estimator.partial_fit(X[start:stop], y[start:stop], self.classes_) self.n_iter += 1 if stop >= len(y): self.fully_fit_ = True break return self
def fit(self, X, y): import sklearn.naive_bayes self.fit_prior = check_for_bool(self.fit_prior) self.estimator = sklearn.naive_bayes.BernoulliNB( alpha=self.alpha, fit_prior=self.fit_prior) self.classes_ = np.unique(y.astype(int)) # Fallback for multilabel classification if len(y.shape) > 1 and y.shape[1] > 1: import sklearn.multiclass self.estimator = sklearn.multiclass.OneVsRestClassifier( self.estimator, n_jobs=1) self.estimator.fit(X, y) return self
def fit(self, X, Y): import sklearn.svm try: soft, hard = resource.getrlimit(resource.RLIMIT_AS) if soft > 0: soft /= 1024 * 1024 maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024 cache_size = (soft - maxrss) / 1.5 else: cache_size = 200 except Exception: cache_size = 200 self.C = float(self.C) self.epsilon = float(self.epsilon) self.tol = float(self.tol) self.shrinking = check_for_bool(self.shrinking) self.degree = int(self.degree) self.gamma = float(self.gamma) if check_none(self.coef0): self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.verbose = int(self.verbose) self.max_iter = int(self.max_iter) self.estimator = sklearn.svm.SVR( kernel=self.kernel, C=self.C, epsilon=self.epsilon, tol=self.tol, shrinking=self.shrinking, degree=self.degree, gamma=self.gamma, coef0=self.coef0, cache_size=cache_size, verbose=self.verbose, max_iter=self.max_iter ) self.scaler = sklearn.preprocessing.StandardScaler(copy=True) self.scaler.fit(Y.reshape((-1, 1))) Y_scaled = self.scaler.transform(Y.reshape((-1, 1))).ravel() self.estimator.fit(X, Y_scaled) return self
def fit(self, X, Y, sample_weight=None): from sklearn.ensemble import ExtraTreesClassifier from sklearn.feature_selection import SelectFromModel self.n_estimators = int(self.n_estimators) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.bootstrap = check_for_bool(self.bootstrap) self.n_jobs = int(self.n_jobs) self.min_impurity_decrease = float(self.min_impurity_decrease) self.max_features = self.max_features self.min_samples_leaf = int(self.min_samples_leaf) self.min_samples_split = int(self.min_samples_split) self.verbose = int(self.verbose) max_features = int(X.shape[1]**float(self.max_features)) estimator = ExtraTreesClassifier( n_estimators=self.n_estimators, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, class_weight=self.class_weight) estimator.fit(X, Y, sample_weight=sample_weight) self.preprocessor = SelectFromModel(estimator=estimator, threshold='mean', prefit=True) return self
def __init__(self, n_estimators, criterion, min_samples_leaf, min_samples_split, max_features, bootstrap, max_leaf_nodes, max_depth, min_weight_fraction_leaf, min_impurity_decrease, oob_score=False, n_jobs=1, random_state=None, verbose=0, class_weight=None): self.n_estimators = int(n_estimators) self.estimator_increment = 10 if criterion not in ("gini", "entropy"): raise ValueError("'criterion' is not in ('gini', 'entropy'): " "%s" % criterion) self.criterion = criterion if check_none(max_depth): self.max_depth = None else: self.max_depth = int(max_depth) if check_none(max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(max_leaf_nodes) self.min_samples_leaf = int(min_samples_leaf) self.min_samples_split = int(min_samples_split) self.max_features = float(max_features) self.bootstrap = check_for_bool(bootstrap) self.min_weight_fraction_leaf = float(min_weight_fraction_leaf) self.min_impurity_decrease = float(min_impurity_decrease) self.oob_score = oob_score self.n_jobs = int(n_jobs) self.random_state = random_state self.verbose = int(verbose) self.class_weight = class_weight self.estimator = None
def fit(self, X, Y, sample_weight=None): from sklearn.ensemble import ExtraTreesClassifier from sklearn.feature_selection import SelectFromModel self.n_estimators = int(self.n_estimators) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.bootstrap = check_for_bool(self.bootstrap) self.n_jobs = int(self.n_jobs) self.min_impurity_decrease = float(self.min_impurity_decrease) self.max_features = self.max_features self.min_samples_leaf = int(self.min_samples_leaf) self.min_samples_split = int(self.min_samples_split) self.verbose = int(self.verbose) max_features = int(X.shape[1] ** float(self.max_features)) estimator = ExtraTreesClassifier( n_estimators=self.n_estimators, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, class_weight=self.class_weight) estimator.fit(X, Y, sample_weight=sample_weight) self.preprocessor = SelectFromModel(estimator=estimator, threshold='mean', prefit=True) return self
def _fit(self, X, y=None): self.use_minimum_fraction = check_for_bool(self.use_minimum_fraction) if self.use_minimum_fraction is False: self.minimum_fraction = None else: self.minimum_fraction = float(self.minimum_fraction) if check_none(self.categorical_features): categorical_features = [] else: categorical_features = self.categorical_features self.preprocessor = autosklearn.pipeline.implementations.OneHotEncoder\ .OneHotEncoder(minimum_fraction=self.minimum_fraction, categorical_features=categorical_features, sparse=True) return self.preprocessor.fit_transform(X)
def fit(self, X, Y): from sklearn.ensemble import ExtraTreesRegressor from sklearn.feature_selection import SelectFromModel self.n_estimators = int(self.n_estimators) self.min_samples_leaf = int(self.min_samples_leaf) self.min_samples_split = int(self.min_samples_split) self.max_features = float(self.max_features) self.bootstrap = check_for_bool(self.bootstrap) self.n_jobs = int(self.n_jobs) self.verbose = int(self.verbose) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf) num_features = X.shape[1] max_features = int( float(self.max_features) * (np.log(num_features) + 1)) # Use at most half of the features max_features = max(1, min(int(X.shape[1] / 2), max_features)) estimator = ExtraTreesRegressor( n_estimators=self.n_estimators, criterion=self.criterion, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, bootstrap=self.bootstrap, max_features=max_features, max_leaf_nodes=self.max_leaf_nodes, oob_score=self.oob_score, n_jobs=self.n_jobs, verbose=self.verbose, min_weight_fraction_leaf=self.min_weight_fraction_leaf, random_state=self.random_state) estimator.fit(X, Y) self.preprocessor = SelectFromModel(estimator=estimator, threshold='mean', prefit=True) return self
def iterative_fit(self, X, y, n_iter=1, refit=False): from sklearn.ensemble import RandomForestRegressor if refit: self.estimator = None if self.estimator is None: self.n_estimators = int(self.n_estimators) if check_none(self.max_depth): self.max_depth = None else: self.max_depth = int(self.max_depth) self.min_samples_split = int(self.min_samples_split) self.min_samples_leaf = int(self.min_samples_leaf) self.max_features = float(self.max_features) self.bootstrap = check_for_bool(self.bootstrap) if check_none(self.max_leaf_nodes): self.max_leaf_nodes = None else: self.max_leaf_nodes = int(self.max_leaf_nodes) self.min_impurity_decrease = float(self.min_impurity_decrease) self.estimator = RandomForestRegressor( n_estimators=0, criterion=self.criterion, max_features=self.max_features, max_depth=self.max_depth, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, min_weight_fraction_leaf=self.min_weight_fraction_leaf, bootstrap=self.bootstrap, max_leaf_nodes=self.max_leaf_nodes, min_impurity_decrease=self.min_impurity_decrease, random_state=self.random_state, n_jobs=self.n_jobs, warm_start=True) self.estimator.n_estimators += n_iter self.estimator.n_estimators = min(self.estimator.n_estimators, self.n_estimators) self.estimator.fit(X, y) return self
def iterative_fit(self, X, y, n_iter=1, refit=False): import sklearn.naive_bayes if refit: self.estimator = None if self.estimator is None: self.n_iter = 0 self.fully_fit_ = False self.fit_prior = check_for_bool(self.fit_prior) self.estimator = sklearn.naive_bayes.BernoulliNB( alpha=self.alpha, fit_prior=self.fit_prior) self.classes_ = np.unique(y.astype(int)) # Fallback for multilabel classification if len(y.shape) > 1 and y.shape[1] > 1: import sklearn.multiclass self.estimator.n_iter = self.n_iter self.estimator = sklearn.multiclass.OneVsRestClassifier( self.estimator, n_jobs=1) self.estimator.fit(X, y) self.fully_fit_ = True else: for iter in range(n_iter): start = min(self.n_iter * 1000, y.shape[0]) stop = min((self.n_iter + 1) * 1000, y.shape[0]) # Upper limit, scipy.sparse doesn't seem to handle max > len(matrix) stop = min(stop, y.shape[0]) if X[start:stop].shape[0] == 0: self.fully_fit_ = True break self.estimator.partial_fit(X[start:stop], y[start:stop], self.classes_) self.n_iter += 1 if stop >= len(y): self.fully_fit_ = True break return self
def iterative_fit(self, X, y, n_iter=1, refit=False): import sklearn.naive_bayes if refit: self.estimator = None if self.estimator is None: self.n_iter = 0 self.fully_fit_ = False self.fit_prior = check_for_bool(self.fit_prior) self.estimator = sklearn.naive_bayes.BernoulliNB( alpha=self.alpha, fit_prior=self.fit_prior) self.classes_ = np.unique(y.astype(int)) # Fallback for multilabel classification if len(y.shape) > 1 and y.shape[1] > 1: import sklearn.multiclass self.estimator.n_iter = self.n_iter self.estimator = sklearn.multiclass.OneVsRestClassifier( self.estimator, n_jobs=1) self.estimator.fit(X, y) self.fully_fit_ = True else: for iter in range(n_iter): start = min(self.n_iter * 1000, y.shape[0]) stop = min((self.n_iter + 1) * 1000, y.shape[0]) # Upper limit, scipy.sparse doesn't seem to handle max > len(matrix) stop = min(stop, y.shape[0]) if X[start:stop].shape[0] == 0: self.fully_fit_ = True break self.estimator.partial_fit(X[start:stop], y[start:stop], self.classes_) self.n_iter += 1 if stop >= len(y): self.fully_fit_ = True break return self
def _fit(self, X, Y=None): import sklearn.decomposition self.whiten = check_for_bool(self.whiten) if check_none(self.n_components): self.n_components = None else: self.n_components = int(self.n_components) self.preprocessor = sklearn.decomposition.FastICA( n_components=self.n_components, algorithm=self.algorithm, fun=self.fun, whiten=self.whiten, random_state=self.random_state ) # Make the RuntimeWarning an Exception! with warnings.catch_warnings(): warnings.filterwarnings("error", message='array must not contain infs or NaNs') try: return self.preprocessor.fit_transform(X) except ValueError as e: if 'array must not contain infs or NaNs' in e.args[0]: raise ValueError("Bug in scikit-learn: https://github.com/scikit-learn/scikit-learn/pull/2738") return self
def iterative_fit(self, X, y, n_iter=2, refit=False): """ Set n_iter=2 for the same reason as for SGD """ from sklearn.neural_network import MLPRegressor import sklearn.preprocessing n_iter = max(n_iter, 2) if refit: self.estimator = None self.scaler = None if self.estimator is None: self._fully_fit = False self.max_iter = int(self.max_iter) self.hidden_layer_depth = int(self.hidden_layer_depth) self.num_nodes_per_layer = int(self.num_nodes_per_layer) self.hidden_layer_sizes = tuple(self.num_nodes_per_layer for i in range(self.hidden_layer_depth)) self.activation = str(self.activation) self.alpha = float(self.alpha) self.learning_rate_init = float(self.learning_rate_init) self.early_stopping = str(self.early_stopping) if self.early_stopping == "train": self.validation_fraction = 0.0 self.tol = float(self.tol) self.n_iter_no_change = int(self.n_iter_no_change) self.early_stopping_val = False elif self.early_stopping == "valid": self.validation_fraction = float(self.validation_fraction) self.tol = float(self.tol) self.n_iter_no_change = int(self.n_iter_no_change) self.early_stopping_val = True else: raise ValueError("Set early stopping to unknown value %s" % self.early_stopping) # elif self.early_stopping == "off": # self.validation_fraction = 0 # self.tol = 10000 # self.n_iter_no_change = self.max_iter # self.early_stopping_val = False self.solver = self.solver try: self.batch_size = int(self.batch_size) except ValueError: self.batch_size = str(self.batch_size) self.shuffle = check_for_bool(self.shuffle) self.beta_1 = float(self.beta_1) self.beta_2 = float(self.beta_2) self.epsilon = float(self.epsilon) self.beta_1 = float(self.beta_1) self.verbose = int(self.verbose) n_iter = int(np.ceil(n_iter)) # initial fit of only increment trees self.estimator = MLPRegressor( hidden_layer_sizes=self.hidden_layer_sizes, activation=self.activation, solver=self.solver, alpha=self.alpha, batch_size=self.batch_size, learning_rate_init=self.learning_rate_init, max_iter=n_iter, shuffle=self.shuffle, random_state=self.random_state, verbose=self.verbose, warm_start=True, early_stopping=self.early_stopping_val, validation_fraction=self.validation_fraction, n_iter_no_change=self.n_iter_no_change, tol=self.tol, beta_1=self.beta_2, beta_2=self.beta_1, epsilon=self.epsilon, # We do not use these, see comments below in search space # momentum=self.momentum, # nesterovs_momentum=self.nesterovs_momentum, # power_t=self.power_t, # learning_rate=self.learning_rate, # max_fun=self.max_fun ) self.scaler = sklearn.preprocessing.StandardScaler(copy=True) self.scaler.fit(y.reshape((-1, 1))) else: new_max_iter = min(self.max_iter - self.estimator.n_iter_, n_iter) self.estimator.max_iter = new_max_iter Y_scaled = self.scaler.transform(y.reshape((-1, 1))).ravel() self.estimator.fit(X, Y_scaled) if self.estimator.n_iter_ >= self.max_iter or \ self.estimator._no_improvement_count > self.n_iter_no_change: self._fully_fit = True return self
def fit(self, X, Y): import sklearn.svm # Calculate the size of the kernel cache (in MB) for sklearn's LibSVM. The cache size is # calculated as 2/3 of the available memory (which is calculated as the memory limit minus # the used memory) try: # Retrieve memory limits imposed on the process soft, hard = resource.getrlimit(resource.RLIMIT_AS) if soft > 0: # Convert limit to units of megabytes soft /= 1024 * 1024 # Retrieve memory used by this process maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024 # In MacOS, the MaxRSS output of resource.getrusage in bytes; on other platforms, # it's in kilobytes if sys.platform == 'darwin': maxrss = maxrss / 1024 cache_size = (soft - maxrss) / 1.5 if cache_size < 0: cache_size = 200 else: cache_size = 200 except Exception: cache_size = 200 self.C = float(self.C) self.epsilon = float(self.epsilon) self.tol = float(self.tol) self.shrinking = check_for_bool(self.shrinking) self.degree = int(self.degree) self.gamma = float(self.gamma) if check_none(self.coef0): self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.verbose = int(self.verbose) self.max_iter = int(self.max_iter) self.estimator = sklearn.svm.SVR(kernel=self.kernel, C=self.C, epsilon=self.epsilon, tol=self.tol, shrinking=self.shrinking, degree=self.degree, gamma=self.gamma, coef0=self.coef0, cache_size=cache_size, verbose=self.verbose, max_iter=self.max_iter) self.scaler = sklearn.preprocessing.StandardScaler(copy=True) self.scaler.fit(Y.reshape((-1, 1))) Y_scaled = self.scaler.transform(Y.reshape((-1, 1))).ravel() self.estimator.fit(X, Y_scaled) return self
def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None): from sklearn.linear_model.passive_aggressive import \ PassiveAggressiveClassifier # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.average = check_for_bool(self.average) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) self.C = float(self.C) call_fit = True self.estimator = PassiveAggressiveClassifier( C=self.C, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, loss=self.loss, shuffle=True, random_state=self.random_state, warm_start=True, average=self.average, ) self.classes_ = np.unique(y.astype(int)) else: call_fit = False # Fallback for multilabel classification if len(y.shape) > 1 and y.shape[1] > 1: import sklearn.multiclass self.estimator.max_iter = 50 self.estimator = sklearn.multiclass.OneVsRestClassifier( self.estimator, n_jobs=1) self.estimator.fit(X, y) self.fully_fit_ = True else: if call_fit: self.estimator.fit(X, y) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 1000) self.estimator._validate_params() lr = "pa1" if self.estimator.loss == "hinge" else "pa2" self.estimator._partial_fit( X, y, alpha=1.0, C=self.estimator.C, loss="hinge", learning_rate=lr, max_iter=n_iter, classes=None, sample_weight=sample_weight, coef_init=None, intercept_init=None ) if ( self.estimator._max_iter >= 1000 or n_iter > self.estimator.n_iter_ ): self.fully_fit_ = True return self
def fit(self, X, Y): import sklearn.svm # Calculate the size of the kernel cache (in MB) for sklearn's LibSVM. The cache size is # calculated as 2/3 of the available memory (which is calculated as the memory limit minus # the used memory) try: # Retrieve memory limits imposed on the process soft, hard = resource.getrlimit(resource.RLIMIT_AS) if soft > 0: # Convert limit to units of megabytes soft /= 1024 * 1024 # Retrieve memory used by this process maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024 # In MacOS, the MaxRSS output of resource.getrusage in bytes; on other platforms, # it's in kilobytes if sys.platform == 'darwin': maxrss = maxrss / 1024 cache_size = (soft - maxrss) / 1.5 if cache_size < 0: cache_size = 200 else: cache_size = 200 except Exception: cache_size = 200 self.C = float(self.C) if self.degree is None: self.degree = 3 else: self.degree = int(self.degree) if self.gamma is None: self.gamma = 0.0 else: self.gamma = float(self.gamma) if self.coef0 is None: self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.tol = float(self.tol) self.max_iter = float(self.max_iter) self.shrinking = check_for_bool(self.shrinking) if check_none(self.class_weight): self.class_weight = None self.estimator = sklearn.svm.SVC(C=self.C, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, tol=self.tol, class_weight=self.class_weight, max_iter=self.max_iter, random_state=self.random_state, cache_size=cache_size, decision_function_shape='ovr') self.estimator.fit(X, Y) return self
def iterative_fit(self, X, y, n_iter=2, refit=False): from sklearn.linear_model import SGDRegressor import sklearn.preprocessing # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. n_iter = max(n_iter, 2) if refit: self.estimator = None self.scaler = None if self.estimator is None: self.alpha = float(self.alpha) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) self.l1_ratio = float( self.l1_ratio) if self.l1_ratio is not None else 0.15 self.epsilon = float( self.epsilon) if self.epsilon is not None else 0.1 self.eta0 = float(self.eta0) self.power_t = float( self.power_t) if self.power_t is not None else 0.25 self.average = check_for_bool(self.average) self.estimator = SGDRegressor(loss=self.loss, penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, learning_rate=self.learning_rate, l1_ratio=self.l1_ratio, epsilon=self.epsilon, eta0=self.eta0, power_t=self.power_t, shuffle=True, average=self.average, random_state=self.random_state, warm_start=True) self.scaler = sklearn.preprocessing.StandardScaler(copy=True) self.scaler.fit(y.reshape((-1, 1))) Y_scaled = self.scaler.transform(y.reshape((-1, 1))).ravel() self.estimator.fit(X, Y_scaled) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, self.max_iter) Y_scaled = self.scaler.transform(y.reshape((-1, 1))).ravel() self.estimator._validate_params() self.estimator._partial_fit( X, Y_scaled, alpha=self.estimator.alpha, C=1.0, loss=self.estimator.loss, learning_rate=self.estimator.learning_rate, max_iter=n_iter, sample_weight=None, coef_init=None, intercept_init=None ) if self.estimator.max_iter >= self.max_iter or n_iter > self.estimator.n_iter_: self.fully_fit_ = True return self
def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None): from sklearn.linear_model.stochastic_gradient import SGDClassifier # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.alpha = float(self.alpha) self.l1_ratio = float(self.l1_ratio) if self.l1_ratio is not None \ else 0.15 self.epsilon = float(self.epsilon) if self.epsilon is not None \ else 0.1 self.eta0 = float(self.eta0) self.power_t = float(self.power_t) if self.power_t is not None \ else 0.5 self.average = check_for_bool(self.average) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) self.estimator = SGDClassifier(loss=self.loss, penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, learning_rate=self.learning_rate, l1_ratio=self.l1_ratio, epsilon=self.epsilon, eta0=self.eta0, power_t=self.power_t, shuffle=True, average=self.average, random_state=self.random_state, warm_start=True) self.estimator.fit(X, y, sample_weight=sample_weight) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 512) self.estimator._validate_params() self.estimator._partial_fit( X, y, alpha=self.estimator.alpha, C=1.0, loss=self.estimator.loss, learning_rate=self.estimator.learning_rate, max_iter=n_iter, sample_weight=sample_weight, classes=None, coef_init=None, intercept_init=None ) if self.estimator.max_iter >= 512 or n_iter > self.estimator.n_iter_: self.fully_fit_ = True return self
def fit(self, X, Y): import sklearn.svm # Calculate the size of the kernel cache (in MB) for sklearn's LibSVM. The cache size is # calculated as 2/3 of the available memory (which is calculated as the memory limit minus # the used memory) try: # Retrieve memory limits imposed on the process soft, hard = resource.getrlimit(resource.RLIMIT_AS) if soft > 0: # Convert limit to units of megabytes soft /= 1024 * 1024 # Retrieve memory used by this process maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024 # In MacOS, the MaxRSS output of resource.getrusage in bytes; on other platforms, # it's in kilobytes if sys.platform == 'darwin': maxrss = maxrss / 1024 cache_size = (soft - maxrss) / 1.5 if cache_size < 0: cache_size = 200 else: cache_size = 200 except Exception: cache_size = 200 self.C = float(self.C) if self.degree is None: self.degree = 3 else: self.degree = int(self.degree) if self.gamma is None: self.gamma = 0.0 else: self.gamma = float(self.gamma) if self.coef0 is None: self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.tol = float(self.tol) self.max_iter = float(self.max_iter) self.shrinking = check_for_bool(self.shrinking) if check_none(self.class_weight): self.class_weight = None self.estimator = sklearn.svm.SVC(C=self.C, kernel=self.kernel, degree=self.degree, gamma=self.gamma, coef0=self.coef0, shrinking=self.shrinking, tol=self.tol, class_weight=self.class_weight, max_iter=self.max_iter, random_state=self.random_state, cache_size=cache_size, decision_function_shape='ovr') self.estimator.fit(X, Y) return self
def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None): from sklearn.linear_model.stochastic_gradient import SGDClassifier # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.alpha = float(self.alpha) self.l1_ratio = float(self.l1_ratio) if self.l1_ratio is not None \ else 0.15 self.epsilon = float(self.epsilon) if self.epsilon is not None \ else 0.1 self.eta0 = float(self.eta0) self.power_t = float(self.power_t) if self.power_t is not None \ else 0.5 self.average = check_for_bool(self.average) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) self.estimator = SGDClassifier(loss=self.loss, penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, learning_rate=self.learning_rate, l1_ratio=self.l1_ratio, epsilon=self.epsilon, eta0=self.eta0, power_t=self.power_t, shuffle=True, average=self.average, random_state=self.random_state, warm_start=True) self.estimator.fit(X, y, sample_weight=sample_weight) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 512) self.estimator._validate_params() self.estimator._partial_fit( X, y, alpha=self.estimator.alpha, C=1.0, loss=self.estimator.loss, learning_rate=self.estimator.learning_rate, max_iter=n_iter, sample_weight=sample_weight, classes=None, coef_init=None, intercept_init=None ) if self.estimator._max_iter >= 512 or n_iter > self.estimator.n_iter_: self.fully_fit_ = True return self
def fit(self, X, Y): import sklearn.svm # Calculate the size of the kernel cache (in MB) for sklearn's LibSVM. The cache size is # calculated as 2/3 of the available memory (which is calculated as the memory limit minus # the used memory) try: # Retrieve memory limits imposed on the process soft, hard = resource.getrlimit(resource.RLIMIT_AS) if soft > 0: # Convert limit to units of megabytes soft /= 1024 * 1024 # Retrieve memory used by this process maxrss = resource.getrusage(resource.RUSAGE_SELF)[2] / 1024 # In MacOS, the MaxRSS output of resource.getrusage in bytes; on other platforms, # it's in kilobytes if sys.platform == 'darwin': maxrss = maxrss / 1024 cache_size = (soft - maxrss) / 1.5 if cache_size < 0: cache_size = 200 else: cache_size = 200 except Exception: cache_size = 200 self.C = float(self.C) self.epsilon = float(self.epsilon) self.tol = float(self.tol) self.shrinking = check_for_bool(self.shrinking) self.degree = int(self.degree) self.gamma = float(self.gamma) if check_none(self.coef0): self.coef0 = 0.0 else: self.coef0 = float(self.coef0) self.verbose = int(self.verbose) self.max_iter = int(self.max_iter) self.estimator = sklearn.svm.SVR( kernel=self.kernel, C=self.C, epsilon=self.epsilon, tol=self.tol, shrinking=self.shrinking, degree=self.degree, gamma=self.gamma, coef0=self.coef0, cache_size=cache_size, verbose=self.verbose, max_iter=self.max_iter ) self.scaler = sklearn.preprocessing.StandardScaler(copy=True) self.scaler.fit(Y.reshape((-1, 1))) Y_scaled = self.scaler.transform(Y.reshape((-1, 1))).ravel() self.estimator.fit(X, Y_scaled) return self
def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None): from sklearn.linear_model.passive_aggressive import \ PassiveAggressiveClassifier # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.average = check_for_bool(self.average) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) self.C = float(self.C) call_fit = True self.estimator = PassiveAggressiveClassifier( C=self.C, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, loss=self.loss, shuffle=True, random_state=self.random_state, warm_start=True, average=self.average, ) self.classes_ = np.unique(y.astype(int)) else: call_fit = False # Fallback for multilabel classification if len(y.shape) > 1 and y.shape[1] > 1: import sklearn.multiclass self.estimator.max_iter = 50 self.estimator = sklearn.multiclass.OneVsRestClassifier( self.estimator, n_jobs=1) self.estimator.fit(X, y) self.fully_fit_ = True else: if call_fit: self.estimator.fit(X, y) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 1000) self.estimator._validate_params() lr = "pa1" if self.estimator.loss == "hinge" else "pa2" self.estimator._partial_fit(X, y, alpha=1.0, C=self.estimator.C, loss="hinge", learning_rate=lr, max_iter=n_iter, classes=None, sample_weight=sample_weight, coef_init=None, intercept_init=None) if (self.estimator.max_iter >= 1000 or n_iter > self.estimator.n_iter_): self.fully_fit_ = True return self