def __init__( self, loss='deviance', learning_rate=0.1, # h2o4gpu n_estimators=100, # h2o4gpu subsample=1.0, # h2o4gpu criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, # h2o4gpu min_impurity_decrease=0.0, min_impurity_split=None, init=None, random_state=None, # h2o4gpu max_features='auto', verbose=0, # h2o4gpu max_leaf_nodes=None, warm_start=False, presort='auto', # XGBoost specific params colsample_bytree=1.0, # h2o4gpu num_parallel_tree=100, # h2o4gpu tree_method='gpu_hist', # h2o4gpu n_gpus=-1, # h2o4gpu predictor='gpu_predictor', # h2o4gpu backend='auto'): # h2o4gpu import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend from ..typecheck.typechecks import assert_is_type assert_is_type(backend, str) # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False if backend == 'auto': params_string = [ 'loss', 'criterion', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'min_impurity_decrease', 'min_impurity_split', 'init', 'max_features', 'max_leaf_nodes', 'presort' ] params = [ loss, criterion, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, min_impurity_decrease, min_impurity_split, init, max_features, max_leaf_nodes, presort ] params_default = [ 'deviance', 'friedman-mse', 2, 1, 0.0, 0.0, None, None, 'auto', None, 'auto' ] i = 0 for param in params: if param != params_default[i]: self.do_sklearn = True if verbose > 0: print("WARNING: The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + ". Will run Sklearn GradientBoostingClassifier.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True elif backend == 'h2o4gpu': self.do_sklearn = False self.backend = backend from h2o4gpu.ensemble import GradientBoostingClassifierSklearn self.model_sklearn = GradientBoostingClassifierSklearn( loss=loss, learning_rate=learning_rate, # h2o4gpu n_estimators=n_estimators, # h2o4gpu subsample=subsample, # h2o4gpu criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_depth=max_depth, # h2o4gpu min_impurity_decrease=min_impurity_decrease, min_impurity_split=min_impurity_split, init=init, random_state=random_state, # h2o4gpu max_features=max_features, verbose=verbose, # h2o4gpu max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, presort=presort) # h2o4gpu) # Parameters for gbm silent = False if verbose != 0: silent = True if random_state is None: random_state = 0 import xgboost as xgb self.model_h2o4gpu = xgb.XGBClassifier( learning_rate=learning_rate, # h2o4gpu n_estimators=n_estimators, # h2o4gpu subsample=subsample, # h2o4gpu max_depth=max_depth, # h2o4gpu random_state=random_state, # h2o4gpu silent=silent, # h2o4gpu colsample_bytree=colsample_bytree, # h2o4gpu num_parallel_tree=num_parallel_tree, # h2o4gpu tree_method=tree_method, # h2o4gpu n_gpus=n_gpus, # h2o4gpu predictor=predictor, # h2o4gpu backend=backend) # h2o4gpu if self.do_sklearn: print("Running sklearn GradientBoostingClassifier") self.model = self.model_sklearn else: print("Running h2o4gpu GradientBoostingClassifier") self.model = self.model_h2o4gpu
class GradientBoostingClassifier(object): """H2O GradientBoostingClassifier Solver Selects between h2o4gpu.solvers.xgboost.GradientBoostingClassifier and h2o4gpu.ensemble.gradient_boosting.GradientBoostingClassifierSklearn Documentation: import h2o4gpu.solvers ; help(h2o4gpu.xgboost.GradientBoostingClassifierO) help(h2o4gpu.ensemble.gradient_boosting.GradientBoostingClassifierSklearn) :param: backend : Which backend to use. Options are 'auto', 'sklearn', 'h2o4gpu'. Default is 'auto'. Saves as attribute for actual backend used. """ def __init__( self, loss='deviance', learning_rate=0.1, # h2o4gpu n_estimators=100, # h2o4gpu subsample=1.0, # h2o4gpu criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, # h2o4gpu min_impurity_decrease=0.0, min_impurity_split=None, init=None, random_state=None, # h2o4gpu max_features='auto', verbose=0, # h2o4gpu max_leaf_nodes=None, warm_start=False, presort='auto', # XGBoost specific params colsample_bytree=1.0, # h2o4gpu num_parallel_tree=100, # h2o4gpu tree_method='gpu_hist', # h2o4gpu n_gpus=-1, # h2o4gpu predictor='gpu_predictor', # h2o4gpu backend='auto'): # h2o4gpu import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend from ..typecheck.typechecks import assert_is_type assert_is_type(backend, str) # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False if backend == 'auto': params_string = [ 'loss', 'criterion', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'min_impurity_decrease', 'min_impurity_split', 'init', 'max_features', 'max_leaf_nodes', 'presort' ] params = [ loss, criterion, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, min_impurity_decrease, min_impurity_split, init, max_features, max_leaf_nodes, presort ] params_default = [ 'deviance', 'friedman-mse', 2, 1, 0.0, 0.0, None, None, 'auto', None, 'auto' ] i = 0 for param in params: if param != params_default[i]: self.do_sklearn = True if verbose > 0: print("WARNING: The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + ". Will run Sklearn GradientBoostingClassifier.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True elif backend == 'h2o4gpu': self.do_sklearn = False self.backend = backend from h2o4gpu.ensemble import GradientBoostingClassifierSklearn self.model_sklearn = GradientBoostingClassifierSklearn( loss=loss, learning_rate=learning_rate, # h2o4gpu n_estimators=n_estimators, # h2o4gpu subsample=subsample, # h2o4gpu criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_depth=max_depth, # h2o4gpu min_impurity_decrease=min_impurity_decrease, min_impurity_split=min_impurity_split, init=init, random_state=random_state, # h2o4gpu max_features=max_features, verbose=verbose, # h2o4gpu max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, presort=presort) # h2o4gpu) # Parameters for gbm silent = False if verbose != 0: silent = True if random_state is None: random_state = 0 import xgboost as xgb self.model_h2o4gpu = xgb.XGBClassifier( learning_rate=learning_rate, # h2o4gpu n_estimators=n_estimators, # h2o4gpu subsample=subsample, # h2o4gpu max_depth=max_depth, # h2o4gpu random_state=random_state, # h2o4gpu silent=silent, # h2o4gpu colsample_bytree=colsample_bytree, # h2o4gpu num_parallel_tree=num_parallel_tree, # h2o4gpu tree_method=tree_method, # h2o4gpu n_gpus=n_gpus, # h2o4gpu predictor=predictor, # h2o4gpu backend=backend) # h2o4gpu if self.do_sklearn: print("Running sklearn GradientBoostingClassifier") self.model = self.model_sklearn else: print("Running h2o4gpu GradientBoostingClassifier") self.model = self.model_h2o4gpu def apply(self, X): print("WARNING: apply() is using sklearn") return self.model_sklearn.apply(X) def decision_function(self, X): print("WARNING: decision_path() is using sklearn") return self.model_sklearn.decision_function(X) def fit(self, X, y=None, sample_weight=None): res = self.model.fit(X, y, sample_weight) self.set_attributes() return res def get_params(self): return self.model.get_params() def predict(self, X): if self.do_sklearn: res = self.model.predict(X) self.set_attributes() return res res = self.model.predict(X) res[res < 0.5] = 0 res[res > 0.5] = 1 self.set_attributes() return res.squeeze() def predict_log_proba(self, X): res = self.predict_proba(X) self.set_attributes() import numpy as np return np.log(res) def predict_proba(self, X): if self.do_sklearn: res = self.model.predict_proba(X) self.set_attributes() return res res = self.model.predict(X) self.set_attributes() return res def score(self, X, y, sample_weight=None): # TODO add for h2o4gpu print("WARNING: score() is using sklearn") if not self.do_sklearn: self.model_sklearn.fit(X, y) # Need to re-fit res = self.model_sklearn.score(X, y, sample_weight) return res def set_params(self, **params): return self.model.set_params(**params) def staged_decision_function(self, X): print("WARNING: staged_decision__function() is using sklearn") return self.model_sklearn.staged_decision_function(X) def staged_predict(self, X): print("WARNING: staged_predict() is using sklearn") return self.model_sklearn.staged_predict(X) def staged_predict_proba(self, X): print("WARNING: staged_predict_proba() is using sklearn") return self.model_sklearn.staged_predict_proba(X) def set_attributes(self): """ Set attributes for class""" from ..solvers.utils import _setter s = _setter(oself=self, e1=NameError, e2=AttributeError) s('oself.feature_importances_ = oself.model.feature_importances_') s('oself.oob_improvement_ = oself.model.oob_improvement_') s('oself.train_score_ = oself.model.train_score_') s('oself.loss_ = oself.model.loss_') s('oself.init = oself.model.init') s('oself.estimators_ = oself.model.estimators_')
def __init__( self, loss='deviance', learning_rate=0.1, # h2o4gpu n_estimators=100, # h2o4gpu subsample=1.0, # h2o4gpu criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, # h2o4gpu min_impurity_decrease=0.0, min_impurity_split=None, init=None, random_state=None, # h2o4gpu max_features='auto', verbose=0, # h2o4gpu max_leaf_nodes=None, warm_start=False, presort='auto', # XGBoost specific params colsample_bytree=1.0, # h2o4gpu num_parallel_tree=100, # h2o4gpu tree_method='gpu_hist', # h2o4gpu n_gpus=-1, # h2o4gpu predictor='gpu_predictor', # h2o4gpu backend='auto'): # h2o4gpu import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend from ..typecheck.typechecks import assert_is_type assert_is_type(backend, str) # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False if backend == 'auto': params_string = [ 'loss', 'criterion', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'min_impurity_decrease', 'min_impurity_split', 'init', 'max_features', 'max_leaf_nodes', 'presort' ] params = [ loss, criterion, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, min_impurity_decrease, min_impurity_split, init, max_features, max_leaf_nodes, presort ] params_default = [ 'deviance', 'friedman-mse', 2, 1, 0.0, 0.0, None, None, 'auto', None, 'auto' ] i = 0 for param in params: if param != params_default[i]: self.do_sklearn = True if verbose > 0: print( "WARNING: The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + ". Will run Sklearn GradientBoostingClassifier.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True elif backend == 'h2o4gpu': self.do_sklearn = False self.backend = backend from h2o4gpu.ensemble import GradientBoostingClassifierSklearn self.model_sklearn = GradientBoostingClassifierSklearn( loss=loss, learning_rate=learning_rate, # h2o4gpu n_estimators=n_estimators, # h2o4gpu subsample=subsample, # h2o4gpu criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_depth=max_depth, # h2o4gpu min_impurity_decrease=min_impurity_decrease, min_impurity_split=min_impurity_split, init=init, random_state=random_state, # h2o4gpu max_features=max_features, verbose=verbose, # h2o4gpu max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, presort=presort) # h2o4gpu) # Parameters for gbm silent = False if verbose != 0: silent = True if random_state is None: random_state = 0 import xgboost as xgb self.model_h2o4gpu = xgb.XGBClassifier( learning_rate=learning_rate, # h2o4gpu n_estimators=n_estimators, # h2o4gpu subsample=subsample, # h2o4gpu max_depth=max_depth, # h2o4gpu random_state=random_state, # h2o4gpu silent=silent, # h2o4gpu colsample_bytree=colsample_bytree, # h2o4gpu num_parallel_tree=num_parallel_tree, # h2o4gpu tree_method=tree_method, # h2o4gpu n_gpus=n_gpus, # h2o4gpu predictor=predictor, # h2o4gpu backend=backend) # h2o4gpu if self.do_sklearn: print("Running sklearn GradientBoostingClassifier") self.model = self.model_sklearn else: print("Running h2o4gpu GradientBoostingClassifier") self.model = self.model_h2o4gpu
def test_gbm_classifier_backupsklearn(backend='auto'): df = pd.read_csv("./open_data/creditcard.csv") X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') import h2o4gpu Solver = h2o4gpu.GradientBoostingClassifier # Run h2o4gpu version of RandomForest Regression gbm = Solver(backend=backend, random_state=1234, n_gpus=1, n_jobs=-1) print("h2o4gpu fit()") gbm.fit(X, y) # Run Sklearn version of RandomForest Regression from h2o4gpu.ensemble import GradientBoostingClassifierSklearn gbm_sk = GradientBoostingClassifierSklearn(random_state=1234, max_depth=3) print("Scikit fit()") gbm_sk.fit(X, y) if backend == "sklearn": assert (gbm.predict(X) == gbm_sk.predict(X)).all() == True assert (gbm.predict_log_proba(X) == gbm_sk.predict_log_proba(X) ).all() == True assert (gbm.predict_proba(X) == gbm_sk.predict_proba(X)).all() == True assert (gbm.score(X, y) == gbm_sk.score(X, y)).all() == True assert (gbm.decision_function(X)[1] == gbm_sk.decision_function(X)[1] ).all() == True assert np.allclose(list(gbm.staged_predict(X)), list(gbm_sk.staged_predict(X))) assert np.allclose(list(gbm.staged_predict_proba(X)), list(gbm_sk.staged_predict_proba(X))) assert (gbm.apply(X) == gbm_sk.apply(X)).all() == True print("Estimators") print(gbm.estimators_) print(gbm_sk.estimators_) print("loss") print(gbm.loss_) print(gbm_sk.loss_) assert gbm.loss_.__dict__ == gbm_sk.loss_.__dict__ print("init_") print(gbm.init) print(gbm_sk.init) print("Feature importance") print(gbm.feature_importances_) print(gbm_sk.feature_importances_) assert (gbm.feature_importances_ == gbm_sk.feature_importances_ ).all() == True print("train_score_") print(gbm.train_score_) print(gbm_sk.train_score_) assert (gbm.train_score_ == gbm_sk.train_score_).all() == True
class GradientBoostingClassifier(object): """H2O GradientBoostingClassifier Solver Selects between h2o4gpu.solvers.xgboost.GradientBoostingClassifier and h2o4gpu.ensemble.gradient_boosting.GradientBoostingClassifierSklearn Documentation: import h2o4gpu.solvers ; help(h2o4gpu.xgboost.GradientBoostingClassifierO) help(h2o4gpu.ensemble.gradient_boosting.GradientBoostingClassifierSklearn) :param: backend : Which backend to use. Options are 'auto', 'sklearn', 'h2o4gpu'. Default is 'auto'. Saves as attribute for actual backend used. """ def __init__( self, loss='deviance', learning_rate=0.1, # h2o4gpu n_estimators=100, # h2o4gpu subsample=1.0, # h2o4gpu criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, # h2o4gpu min_impurity_decrease=0.0, min_impurity_split=None, init=None, random_state=None, # h2o4gpu max_features='auto', verbose=0, # h2o4gpu max_leaf_nodes=None, warm_start=False, presort='auto', # XGBoost specific params colsample_bytree=1.0, # h2o4gpu num_parallel_tree=100, # h2o4gpu tree_method='gpu_hist', # h2o4gpu n_gpus=-1, # h2o4gpu predictor='gpu_predictor', # h2o4gpu backend='auto'): # h2o4gpu import os _backend = os.environ.get('H2O4GPU_BACKEND', None) if _backend is not None: backend = _backend from ..typecheck.typechecks import assert_is_type assert_is_type(backend, str) # Fall back to Sklearn # Can remove if fully implement sklearn functionality self.do_sklearn = False if backend == 'auto': params_string = [ 'loss', 'criterion', 'min_samples_split', 'min_samples_leaf', 'min_weight_fraction_leaf', 'min_impurity_decrease', 'min_impurity_split', 'init', 'max_features', 'max_leaf_nodes', 'presort' ] params = [ loss, criterion, min_samples_split, min_samples_leaf, min_weight_fraction_leaf, min_impurity_decrease, min_impurity_split, init, max_features, max_leaf_nodes, presort ] params_default = [ 'deviance', 'friedman-mse', 2, 1, 0.0, 0.0, None, None, 'auto', None, 'auto' ] i = 0 for param in params: if param != params_default[i]: self.do_sklearn = True if verbose > 0: print( "WARNING: The sklearn parameter " + params_string[i] + " has been changed from default to " + str(param) + ". Will run Sklearn GradientBoostingClassifier.") self.do_sklearn = True i = i + 1 elif backend == 'sklearn': self.do_sklearn = True elif backend == 'h2o4gpu': self.do_sklearn = False self.backend = backend from h2o4gpu.ensemble import GradientBoostingClassifierSklearn self.model_sklearn = GradientBoostingClassifierSklearn( loss=loss, learning_rate=learning_rate, # h2o4gpu n_estimators=n_estimators, # h2o4gpu subsample=subsample, # h2o4gpu criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_depth=max_depth, # h2o4gpu min_impurity_decrease=min_impurity_decrease, min_impurity_split=min_impurity_split, init=init, random_state=random_state, # h2o4gpu max_features=max_features, verbose=verbose, # h2o4gpu max_leaf_nodes=max_leaf_nodes, warm_start=warm_start, presort=presort) # h2o4gpu) # Parameters for gbm silent = False if verbose != 0: silent = True if random_state is None: random_state = 0 import xgboost as xgb self.model_h2o4gpu = xgb.XGBClassifier( learning_rate=learning_rate, # h2o4gpu n_estimators=n_estimators, # h2o4gpu subsample=subsample, # h2o4gpu max_depth=max_depth, # h2o4gpu random_state=random_state, # h2o4gpu silent=silent, # h2o4gpu colsample_bytree=colsample_bytree, # h2o4gpu num_parallel_tree=num_parallel_tree, # h2o4gpu tree_method=tree_method, # h2o4gpu n_gpus=n_gpus, # h2o4gpu predictor=predictor, # h2o4gpu backend=backend) # h2o4gpu if self.do_sklearn: print("Running sklearn GradientBoostingClassifier") self.model = self.model_sklearn else: print("Running h2o4gpu GradientBoostingClassifier") self.model = self.model_h2o4gpu def apply(self, X): print("WARNING: apply() is using sklearn") return self.model_sklearn.apply(X) def decision_function(self, X): print("WARNING: decision_path() is using sklearn") return self.model_sklearn.decision_function(X) def fit(self, X, y=None, sample_weight=None): res = self.model.fit(X, y, sample_weight) self.set_attributes() return res def get_params(self): return self.model.get_params() def predict(self, X): if self.do_sklearn: res = self.model.predict(X) self.set_attributes() return res res = self.model.predict(X) res[res < 0.5] = 0 res[res > 0.5] = 1 self.set_attributes() return res.squeeze() def predict_log_proba(self, X): res = self.predict_proba(X) self.set_attributes() import numpy as np return np.log(res) def predict_proba(self, X): if self.do_sklearn: res = self.model.predict_proba(X) self.set_attributes() return res res = self.model.predict(X) self.set_attributes() return res def score(self, X, y, sample_weight=None): # TODO add for h2o4gpu print("WARNING: score() is using sklearn") if not self.do_sklearn: self.model_sklearn.fit(X, y) # Need to re-fit res = self.model_sklearn.score(X, y, sample_weight) return res def set_params(self, **params): return self.model.set_params(**params) def staged_decision_function(self, X): print("WARNING: staged_decision__function() is using sklearn") return self.model_sklearn.staged_decision_function(X) def staged_predict(self, X): print("WARNING: staged_predict() is using sklearn") return self.model_sklearn.staged_predict(X) def staged_predict_proba(self, X): print("WARNING: staged_predict_proba() is using sklearn") return self.model_sklearn.staged_predict_proba(X) def set_attributes(self): """ Set attributes for class""" from ..solvers.utils import _setter s = _setter(oself=self, e1=NameError, e2=AttributeError) s('oself.feature_importances_ = oself.model.feature_importances_') s('oself.oob_improvement_ = oself.model.oob_improvement_') s('oself.train_score_ = oself.model.train_score_') s('oself.loss_ = oself.model.loss_') s('oself.init = oself.model.init') s('oself.estimators_ = oself.model.estimators_')