def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None): from sklearn.linear_model.passive_aggressive import \ PassiveAggressiveRegressor # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.average = check_for_bool(self.average) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) self.C = float(self.C) call_fit = True self.estimator = PassiveAggressiveRegressor( C=self.C, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, loss=self.loss, shuffle=True, random_state=self.random_state, warm_start=True, average=self.average, ) else: call_fit = False if call_fit: self.estimator.fit(X, y) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 1000) self.estimator._validate_params() lr = "pa1" if self.estimator.loss == "epsilon_insensitive" else "pa2" self.estimator._partial_fit( X, y, alpha=1.0, C=self.estimator.C, loss="epsilon_insensitive", learning_rate=lr, max_iter=n_iter, sample_weight=sample_weight, coef_init=None, intercept_init=None ) if self.estimator.max_iter >= 1000 or n_iter > self.estimator.n_iter_: self.fully_fit_ = True return self
class PassiveAggressiveRegressorImpl(): def __init__(self, C=1.0, fit_intercept=True, max_iter=None, tol=None, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, shuffle=True, verbose=0, loss='epsilon_insensitive', epsilon=0.1, random_state=None, warm_start=False, average=False, n_iter=None): self._hyperparams = { 'C': C, 'fit_intercept': fit_intercept, 'max_iter': max_iter, 'tol': tol, 'early_stopping': early_stopping, 'validation_fraction': validation_fraction, 'n_iter_no_change': n_iter_no_change, 'shuffle': shuffle, 'verbose': verbose, 'loss': loss, 'epsilon': epsilon, 'random_state': random_state, 'warm_start': warm_start, 'average': average, 'n_iter': n_iter} def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def predict(self, X): return self._sklearn_model.predict(X)
def __init__(self, C=1.0, fit_intercept=True, max_iter=None, tol=None, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, shuffle=True, verbose=0, loss='epsilon_insensitive', epsilon=0.1, random_state=None, warm_start=False, average=False, n_iter=None): self._hyperparams = { 'C': C, 'fit_intercept': fit_intercept, 'max_iter': max_iter, 'tol': tol, 'early_stopping': early_stopping, 'validation_fraction': validation_fraction, 'n_iter_no_change': n_iter_no_change, 'shuffle': shuffle, 'verbose': verbose, 'loss': loss, 'epsilon': epsilon, 'random_state': random_state, 'warm_start': warm_start, 'average': average, 'n_iter': n_iter } self._wrapped_model = Op(**self._hyperparams)
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
"Extra Tree", "SVR" ] classifiers = [ RandomForestRegressor(n_estimators=200, n_jobs=5, random_state=randomstate), ExtraTreesRegressor(n_estimators=200, n_jobs=5, random_state=randomstate), # GradientBoostingRegressor(random_state=randomstate), # learning_rate is a hyper-parameter in the range (0.0, 1.0] # HistGradientBoostingClassifier(random_state=randomstate), # learning_rate is a hyper-parameter in the range (0.0, 1.0] AdaBoostRegressor(n_estimators=200, random_state=randomstate), GaussianProcessRegressor(normalize_y=True), ARDRegression(), # HuberRegressor(), # epsilon: greater than 1.0, default 1.35 LinearRegression(n_jobs=5), PassiveAggressiveRegressor( random_state=randomstate), # C: 0.25, 0.5, 1, 5, 10 SGDRegressor(random_state=randomstate), TheilSenRegressor(n_jobs=5, random_state=randomstate), RANSACRegressor(random_state=randomstate), KNeighborsRegressor( weights='distance'), # n_neighbors: 3, 6, 9, 12, 15, 20 RadiusNeighborsRegressor(weights='distance'), # radius: 1, 2, 5, 10, 15 MLPRegressor(max_iter=10000000, random_state=randomstate), DecisionTreeRegressor( random_state=randomstate), # max_depth = 2, 3, 4, 6, 8 ExtraTreeRegressor(random_state=randomstate), # max_depth = 2, 3, 4, 6, 8 SVR() # C: 0.25, 0.5, 1, 5, 10 ] selectors = [ reliefF.reliefF,
'NearestCentroid':NearestCentroid(), 'NearestNeighbors':NearestNeighbors(), 'Normalizer':Normalizer(), 'NuSVC':NuSVC(), 'NuSVR':NuSVR(), 'Nystroem':Nystroem(), 'OAS':OAS(), 'OneClassSVM':OneClassSVM(), 'OrthogonalMatchingPursuit':OrthogonalMatchingPursuit(), 'OrthogonalMatchingPursuitCV':OrthogonalMatchingPursuitCV(), 'PCA':PCA(), 'PLSCanonical':PLSCanonical(), 'PLSRegression':PLSRegression(), 'PLSSVD':PLSSVD(), 'PassiveAggressiveClassifier':PassiveAggressiveClassifier(), 'PassiveAggressiveRegressor':PassiveAggressiveRegressor(), 'Perceptron':Perceptron(), 'ProjectedGradientNMF':ProjectedGradientNMF(), 'QuadraticDiscriminantAnalysis':QuadraticDiscriminantAnalysis(), 'RANSACRegressor':RANSACRegressor(), 'RBFSampler':RBFSampler(), 'RadiusNeighborsClassifier':RadiusNeighborsClassifier(), 'RadiusNeighborsRegressor':RadiusNeighborsRegressor(), 'RandomForestClassifier':RandomForestClassifier(), 'RandomForestRegressor':RandomForestRegressor(), 'RandomizedLasso':RandomizedLasso(), 'RandomizedLogisticRegression':RandomizedLogisticRegression(), 'RandomizedPCA':RandomizedPCA(), 'Ridge':Ridge(), 'RidgeCV':RidgeCV(), 'RidgeClassifier':RidgeClassifier(),
class PassiveAggressive( IterativeComponentWithSampleWeight, BaseRegressionModel, ): def __init__(self, C, fit_intercept, tol, loss, average, random_state=None): self.C = C self.fit_intercept = fit_intercept self.average = average self.tol = tol self.loss = loss self.random_state = random_state self.estimator = None self.start_time = time.time() self.time_limit = None def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None): from sklearn.linear_model.passive_aggressive import \ PassiveAggressiveRegressor # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.average = check_for_bool(self.average) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) self.C = float(self.C) call_fit = True self.estimator = PassiveAggressiveRegressor( C=self.C, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, loss=self.loss, shuffle=True, random_state=self.random_state, warm_start=True, average=self.average, ) else: call_fit = False if call_fit: self.estimator.fit(X, y) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 1000) self.estimator._validate_params() lr = "pa1" if self.estimator.loss == "epsilon_insensitive" else "pa2" self.estimator._partial_fit( X, y, alpha=1.0, C=self.estimator.C, loss="epsilon_insensitive", learning_rate=lr, max_iter=n_iter, sample_weight=sample_weight, coef_init=None, intercept_init=None ) if self.estimator.max_iter >= 1000 or n_iter > self.estimator.n_iter_: self.fully_fit_ = True return self def configuration_fully_fitted(self): if self.estimator is None: return False elif not hasattr(self, 'fully_fit_'): return False else: return self.fully_fit_ def predict(self, X): if self.estimator is None: raise NotImplementedError() return self.estimator.predict(X) @staticmethod def get_properties(dataset_properties=None): return {'shortname': 'PassiveAggressive Regressor', 'name': 'Passive Aggressive Regressor', 'handles_regression': True, 'handles_classification': False, 'handles_multiclass': False, 'handles_multilabel': False, 'is_deterministic': True, 'input': (DENSE, SPARSE, UNSIGNED_DATA), 'output': (PREDICTIONS,)} @staticmethod def get_hyperparameter_search_space(dataset_properties=None): C = UniformFloatHyperparameter("C", 1e-5, 10, 1.0, log=True) fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True") loss = CategoricalHyperparameter( "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"], default_value="epsilon_insensitive" ) tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, default_value=1e-4, log=True) # Note: Average could also be an Integer if > 1 average = CategoricalHyperparameter('average', ['False', 'True'], default_value='False') cs = ConfigurationSpace() cs.add_hyperparameters([loss, fit_intercept, tol, C, average]) return cs