Пример #1
0
    def __init__(
            self,
            penalty='l2',  # h2o4gpu
            dual=False,
            tol=1E-2,  # h2o4gpu
            C=1.0,  # h2o4gpu
            fit_intercept=True,  # h2o4gpu
            intercept_scaling=1.0,
            class_weight=None,
            random_state=None,
            solver='liblinear',
            max_iter=5000,  # h2o4gpu
            multi_class='ovr',
            verbose=0,  # h2o4gpu
            warm_start=False,
            n_jobs=1,
            n_gpus=-1,  # h2o4gpu
            glm_stop_early=True,  # h2o4gpu
            glm_stop_early_error_fraction=1.0,
            backend='auto'):  # h2o4gpu
        import os
        _backend = os.environ.get('H2O4GPU_BACKEND', None)
        if _backend is not None:
            backend = _backend

        # Fall back to Sklearn
        # Can remove if fully implement sklearn functionality
        self.do_sklearn = False
        if backend == 'auto':
            params_string = [
                'intercept_scaling', 'class_weight', 'solver', 'multi_class'
            ]
            params = [intercept_scaling, class_weight, solver, multi_class]
            params_default = [1.0, None, 'liblinear', 'ovr']

            i = 0
            for param in params:
                if param != params_default[i]:
                    self.do_sklearn = True
                    if verbose:
                        print("WARNING:"
                              " The sklearn parameter " + params_string[i] +
                              " has been changed from default to " + str(param)
                              + "  Will run Sklearn Logistic Regression.")
                    self.do_sklearn = True
                i = i + 1
        elif backend == 'sklearn':
            self.do_sklearn = True
        elif backend == 'h2o4gpu':
            self.do_sklearn = False
        if self.do_sklearn:
            self.backend = 'sklearn'
        else:
            self.backend = 'h2o4gpu'

        self.model_sklearn = sk.LogisticRegressionSklearn(
            penalty=penalty,
            dual=dual,
            tol=tol,
            C=C,
            fit_intercept=fit_intercept,
            intercept_scaling=intercept_scaling,
            class_weight=class_weight,
            random_state=random_state,
            solver=solver,
            max_iter=max_iter,
            multi_class=multi_class,
            verbose=verbose,
            warm_start=warm_start,
            n_jobs=n_jobs)

        # Equivalent Logistic parameters for h2o4gpu
        n_threads = None
        n_alphas = 1
        n_lambdas = 1
        n_folds = 1
        lambda_max = 1.0 / C
        lambda_min_ratio = 1.0
        lambda_stop_early = False
        store_full_path = 0
        alphas = None
        lambdas = None

        # Utilize penalty parameter to setup alphas
        if penalty == 'l2':
            alpha_min = 0.0
            alpha_max = 0.0
        elif penalty == 'l1':
            alpha_min = 1.0
            alpha_max = 1.0
        else:
            assert ValueError, "penalty should be either l1 " \
                               "or l2 but got " + penalty

        self.model_h2o4gpu = elastic_net.ElasticNetH2O(
            n_threads=n_threads,
            n_gpus=n_gpus,
            fit_intercept=fit_intercept,
            lambda_min_ratio=lambda_min_ratio,
            n_lambdas=n_lambdas,
            n_folds=n_folds,
            n_alphas=n_alphas,
            tol=tol,
            lambda_stop_early=lambda_stop_early,
            glm_stop_early=glm_stop_early,
            glm_stop_early_error_fraction=glm_stop_early_error_fraction,
            max_iter=max_iter,
            verbose=verbose,
            family='logistic',
            store_full_path=store_full_path,
            lambda_max=lambda_max,
            alpha_max=alpha_max,
            alpha_min=alpha_min,
            alphas=alphas,
            lambdas=lambdas,
            order=None)

        if self.do_sklearn:
            if verbose:
                print("Running sklearn Logistic Regression")
            self.model = self.model_sklearn
        else:
            if verbose:
                print("Running h2o4gpu Logistic Regression")
            self.model = self.model_h2o4gpu
        self.verbose = verbose
Пример #2
0
    def __init__(
            self,
            alpha=1.0,  #h2o4gpu
            fit_intercept=True,  #h2o4gpu
            normalize=False,
            precompute=False,
            copy_X=True,
            max_iter=5000,  #h2o4gpu
            tol=1e-2,  #h2o4gpu
            warm_start=False,
            positive=False,
            random_state=None,
            selection='cyclic',
            n_gpus=-1,  # h2o4gpu
            glm_stop_early=True,  # h2o4gpu
            glm_stop_early_error_fraction=1.0,  #h2o4gpu
            verbose=False,
            backend='auto'):  # h2o4gpu

        import os
        _backend = os.environ.get('H2O4GPU_BACKEND', None)
        if _backend is not None:
            backend = _backend
        assert_is_type(backend, str)

        # Fall back to Sklearn
        # Can remove if fully implement sklearn functionality
        self.do_sklearn = False
        if backend == 'auto':
            params_string = ['normalize', 'positive', 'selection']
            params = [normalize, positive, selection]
            params_default = [False, False, 'cyclic']

            i = 0
            for param in params:
                if param != params_default[i]:
                    self.do_sklearn = True
                    if verbose:
                        print("WARNING:"
                              " The sklearn parameter " + params_string[i] +
                              " has been changed from default to " + str(param)
                              + ". Will run Sklearn Lasso Regression.")
                    self.do_sklearn = True
                i = i + 1
        elif backend == 'sklearn':
            self.do_sklearn = True
        elif backend == 'h2o4gpu':
            self.do_sklearn = False
        if self.do_sklearn:
            self.backend = 'sklearn'
        else:
            self.backend = 'h2o4gpu'

        self.model_sklearn = sk.LassoSklearn(
            alpha=alpha,
            fit_intercept=fit_intercept,
            normalize=normalize,
            precompute=precompute,
            copy_X=copy_X,
            max_iter=max_iter,
            tol=tol,
            warm_start=warm_start,
            positive=positive,
            random_state=random_state,
            selection=selection)

        #Equivalent Lasso parameters for h2o4gpu
        n_threads = None
        n_alphas = 1
        n_lambdas = 1
        n_folds = 1
        lambda_max = alpha
        lambda_min_ratio = 1.0
        lambda_stop_early = False
        store_full_path = 1
        alphas = None
        lambdas = None
        alpha_min = 1.0
        alpha_max = 1.0

        self.model_h2o4gpu = elastic_net.ElasticNetH2O(
            n_threads=n_threads,
            n_gpus=n_gpus,
            fit_intercept=fit_intercept,
            lambda_min_ratio=lambda_min_ratio,
            n_lambdas=n_lambdas,
            n_folds=n_folds,
            n_alphas=n_alphas,
            tol=tol,
            lambda_stop_early=lambda_stop_early,
            glm_stop_early=glm_stop_early,
            glm_stop_early_error_fraction=glm_stop_early_error_fraction,
            max_iter=max_iter,
            verbose=verbose,
            store_full_path=store_full_path,
            lambda_max=lambda_max,
            alpha_max=alpha_max,
            alpha_min=alpha_min,
            alphas=alphas,
            lambdas=lambdas,
            order=None)

        if self.do_sklearn:
            if verbose:
                print("Running sklearn Lasso Regression")
            self.model = self.model_sklearn
        else:
            if verbose:
                print("Running h2o4gpu Lasso Regression")
            self.model = self.model_h2o4gpu
        self.verbose = verbose
Пример #3
0
    def __init__(
            self,
            alpha=1.0,  #h2o4gpu
            fit_intercept=True,  #h2o4gpu
            normalize=False,
            copy_X=True,
            max_iter=5000,  #h2o4gpu
            tol=1e-2,  #h2o4gpu
            solver='auto',
            random_state=None,
            n_gpus=-1,  # h2o4gpu
            glm_stop_early=True,  # h2o4gpu
            glm_stop_early_error_fraction=1.0,  #h2o4gpu
            verbose=False,
            backend='auto',
            **kwargs):  # h2o4gpu

        import os
        _backend = os.environ.get('H2O4GPU_BACKEND', None)
        if _backend is not None:
            backend = _backend

        self.do_daal = False
        self.do_sklearn = False
        # Fall back to Sklearn
        # Can remove if fully implement sklearn functionality
        self.do_sklearn = False
        if backend == 'auto':
            params_string = ['normalize', 'solver']
            params = [normalize, solver]
            params_default = [False, 'auto']

            i = 0
            for param in params:
                if param != params_default[i]:
                    self.do_sklearn = True
                    if verbose:
                        print("WARNING:"
                              " The sklearn parameter " + params_string[i] +
                              " has been changed from default to " + str(param)
                              + ". Will run Sklearn Ridge Regression.")
                    self.do_sklearn = True
                i = i + 1
        elif backend == 'sklearn':
            self.do_sklearn = True
            self.backend = 'sklearn'
        elif backend == 'h2o4gpu':
            self.do_sklearn = False
            self.backend = 'h2o4gpu'
        elif backend == 'daal':
            from h2o4gpu import DAAL_SUPPORTED
            if DAAL_SUPPORTED:
                from h2o4gpu.solvers.daal_solver.regression \
                        import RidgeRegression as DRR
                self.do_daal = True
                self.backend = 'daal'

                self.model_daal = DRR(alpha=alpha,
                                      fit_intercept=fit_intercept,
                                      normalize=normalize,
                                      **kwargs)
            else:
                import platform
                print("WARNING:"
                      "DAAL is supported only for x86_64, "
                      "architecture detected {}. Sklearn model"
                      "used instead".format(platform.architecture()))
                self.do_sklearn = True
                self.backend = 'h2o4gpu'

        self.model_sklearn = sk.RidgeSklearn(
            alpha=alpha,
            fit_intercept=fit_intercept,
            normalize=normalize,
            copy_X=copy_X,
            max_iter=max_iter,
            tol=tol,
            solver=solver,
            random_state=random_state)

        # Equivalent Ridge parameters for h2o4gpu
        n_threads = None
        n_alphas = 1
        n_lambdas = 1
        n_folds = 1
        lambda_max = alpha
        lambda_min_ratio = 1.0
        lambda_stop_early = False
        store_full_path = 1
        alphas = None
        lambdas = None
        alpha_min = 0.0
        alpha_max = 0.0

        self.model_h2o4gpu = elastic_net.ElasticNetH2O(
            n_threads=n_threads,
            n_gpus=n_gpus,
            fit_intercept=fit_intercept,
            lambda_min_ratio=lambda_min_ratio,
            n_lambdas=n_lambdas,
            n_folds=n_folds,
            n_alphas=n_alphas,
            tol=tol,
            lambda_stop_early=lambda_stop_early,
            glm_stop_early=glm_stop_early,
            glm_stop_early_error_fraction=glm_stop_early_error_fraction,
            max_iter=max_iter,
            verbose=verbose,
            store_full_path=store_full_path,
            lambda_max=lambda_max,
            alpha_max=alpha_max,
            alpha_min=alpha_min,
            alphas=alphas,
            lambdas=lambdas,
            order=None)

        if self.do_sklearn:
            if verbose:
                print("Running sklearn Ridge Regression")
            self.model = self.model_sklearn
        elif self.do_daal:
            if verbose:
                print("Running PyDAAL Ridge Regression")
            self.model = self.model_daal
        else:
            if verbose:
                print("Running h2o4gpu Ridge Regression")
            self.model = self.model_h2o4gpu
        self.verbose = verbose
Пример #4
0
    def __init__(
            self,
            fit_intercept=True,  #h2o4gpu
            normalize=False,
            copy_X=True,
            n_jobs=1,
            n_gpus=-1,
            tol=1E-4,
            glm_stop_early=True,  # h2o4gpu
            glm_stop_early_error_fraction=1.0,  # h2o4gpu
            verbose=False,
            backend='auto',
            **kwargs):

        import os
        _backend = os.environ.get('H2O4GPU_BACKEND', None)
        if _backend is not None:
            backend = _backend

        self.do_daal = False
        self.do_sklearn = False

        if backend == 'auto':
            # Fall back to Sklearn
            # Can remove if fully implement sklearn functionality
            self.do_sklearn = False

            params_string = ['normalize']
            params = [normalize]
            params_default = [False]

            i = 0
            for param in params:
                if param != params_default[i]:
                    self.do_sklearn = True
                    if verbose:
                        print("WARNING:"
                              " The sklearn parameter " + params_string[i] +
                              " has been changed from default to " +
                              str(param) +
                              ". Will run Sklearn Linear Regression.")
                    self.do_sklearn = True
                i = i + 1
        elif backend == 'sklearn':
            self.do_sklearn = True
            self.backend = 'sklearn'
        elif backend == 'h2o4gpu':
            self.do_sklearn = False
            self.backend = 'h2o4gpu'
        elif backend == 'daal':
            from h2o4gpu import DAAL_SUPPORTED
            if DAAL_SUPPORTED:
                from h2o4gpu.solvers.daal_solver.regression \
                    import LinearRegression as DLR
                self.do_daal = True
                self.backend = 'daal'

                self.model_daal = DLR(fit_intercept=fit_intercept,
                                      normalize=normalize,
                                      **kwargs)
            else:
                import platform
                print("WARNING:"
                      "DAAL is supported only for x86_64, "
                      "architecture detected {}. Sklearn model"
                      "used instead".format(platform.architecture()))
                self.do_sklearn = True
                self.backend = 'sklearn'

        self.model_sklearn = sk.LinearRegressionSklearn(
            fit_intercept=fit_intercept,
            normalize=normalize,
            copy_X=copy_X,
            n_jobs=n_jobs)

        # Equivalent Linear Regression parameters for h2o4gpu
        n_threads = None
        n_gpus = n_gpus
        fit_intercept = fit_intercept
        lambda_min_ratio = 0.0
        n_lambdas = 1
        n_folds = 1
        n_alphas = 1
        tol = tol
        tol_seek_factor = 1E-1
        lambda_stop_early = False
        glm_stop_early = glm_stop_early
        glm_stop_early_error_fraction = glm_stop_early_error_fraction
        max_iter = 5000
        verbose = verbose
        family = 'elasticnet'
        lambda_max = 0.0
        alpha_max = 0.0
        alpha_min = 0.0
        alphas = None
        lambdas = None

        self.model_h2o4gpu = elastic_net.ElasticNetH2O(
            n_threads=n_threads,
            n_gpus=n_gpus,
            fit_intercept=fit_intercept,
            lambda_min_ratio=lambda_min_ratio,
            n_lambdas=n_lambdas,
            n_folds=n_folds,
            n_alphas=n_alphas,
            tol=tol,
            lambda_stop_early=lambda_stop_early,
            glm_stop_early=glm_stop_early,
            glm_stop_early_error_fraction=glm_stop_early_error_fraction,
            max_iter=max_iter,
            verbose=verbose,
            lambda_max=lambda_max,
            alpha_max=alpha_max,
            alpha_min=alpha_min,
            alphas=alphas,
            lambdas=lambdas,
            tol_seek_factor=tol_seek_factor,
            family=family,
            order=None)

        if self.do_sklearn:
            if verbose:
                print("Running sklearn Linear Regression")
            self.model = self.model_sklearn
        elif self.do_daal:
            if verbose:
                print("Running PyDAAL Linear Regression")
            self.model = self.model_daal
        else:
            if verbose:
                print("Running h2o4gpu Linear Regression")
            self.model = self.model_h2o4gpu
        self.verbose = verbose
Пример #5
0
def fit_model(X_train, y_train, X_test, y_test, reg_type='enet'):

    if reg_type == 'lasso':
        tol = 1e-2
        alpha = 1.0
        n_threads = None
        n_alphas = 1
        n_lambdas = 1
        n_folds = 1
        lambda_max = alpha
        lambda_min_ratio = 1.0
        lambda_stop_early = False
        store_full_path = 1
        alphas = None
        lambdas = None
        alpha_min = 1.0
        alpha_max = 1.0
        n_gpus = -1
        fit_intercept = True
        max_iter = 5000
        glm_stop_early = True
        glm_stop_early_error_fraction = 1.0
        verbose = False

        reg_h2o = elastic_net.ElasticNetH2O(
            n_threads=n_threads,
            n_gpus=n_gpus,
            fit_intercept=fit_intercept,
            lambda_min_ratio=lambda_min_ratio,
            n_lambdas=n_lambdas,
            n_folds=n_folds,
            n_alphas=n_alphas,
            tol=tol,
            lambda_stop_early=lambda_stop_early,
            glm_stop_early=glm_stop_early,
            glm_stop_early_error_fraction=glm_stop_early_error_fraction,
            max_iter=max_iter,
            verbose=verbose,
            store_full_path=store_full_path,
            lambda_max=lambda_max,
            alpha_max=alpha_max,
            alpha_min=alpha_min,
            alphas=alphas,
            lambdas=lambdas,
            order=None)

        reg_sklearn = linear_model.LassoSklearn()
    elif reg_type == 'ridge':
        reg_h2o = h2o4gpu.Ridge()
        reg_sklearn = linear_model.RidgeSklearn()
    elif reg_type == 'enet':
        reg_h2o = h2o4gpu.ElasticNet()  # update when the wrapper is done
        reg_sklearn = linear_model.ElasticNetSklearn()

    start_h2o = time.time()
    reg_h2o.fit(X_train, y_train, free_input_data=1)
    time_h2o = time.time() - start_h2o

    start_sklearn = time.time()
    reg_sklearn.fit(X_train, y_train)
    time_sklearn = time.time() - start_sklearn

    # Predicting test values
    y_pred_h2o = reg_h2o.predict(X_test, free_input_data=1)
    y_pred_h2o = y_pred_h2o.squeeze()

    y_pred_sklearn = reg_sklearn.predict(X_test)

    # Calculating R^2 scores
    r2_h2o = r2_score(y_test, y_pred_h2o)
    r2_sklearn = r2_score(y_test, y_pred_sklearn)

    # Clearing the memory
    reg_h2o.free_sols()
    reg_h2o.free_preds()
    reg_h2o.finish()
    del reg_h2o
    del reg_sklearn
    gc.collect()

    return time_h2o, time_sklearn, r2_h2o, r2_sklearn
Пример #6
0
    def __init__(
            self,
            fit_intercept=True,  #h2o4gpu
            normalize=False,
            copy_X=True,
            n_jobs=1,
            n_gpus=-1,
            tol=1E-4,
            glm_stop_early=True,  # h2o4gpu
            glm_stop_early_error_fraction=1.0,  # h2o4gpu
            verbose=False,
            backend='auto'):

        import os
        _backend = os.environ.get('H2O4GPU_BACKEND', None)
        if _backend is not None:
            backend = _backend
        assert_is_type(backend, str)

        if backend == 'auto':
            # Fall back to Sklearn
            # Can remove if fully implement sklearn functionality
            self.do_sklearn = False

            params_string = ['normalize', 'copy_X', 'n_jobs']
            params = [normalize, copy_X, n_jobs]
            params_default = [False, True, 1]

            i = 0
            self.do_sklearn = False
            for param in params:
                if param != params_default[i]:
                    self.do_sklearn = True
                    print("WARNING: The sklearn parameter " +
                          params_string[i] +
                          " has been changed from default to " + str(param) +
                          ". Will run Sklearn Linear Regression.")
                    self.do_sklearn = True
                i = i + 1
        elif backend == 'sklearn':
            self.do_sklearn = True
        elif backend == 'h2o4gpu':
            self.do_sklearn = False
        self.backend = backend

        self.model_sklearn = sk.LinearRegressionSklearn(
            fit_intercept=fit_intercept,
            normalize=normalize,
            copy_X=copy_X,
            n_jobs=n_jobs)

        # Equivalent Linear Regression parameters for h2o4gpu
        n_threads = None
        n_gpus = n_gpus
        fit_intercept = fit_intercept
        lambda_min_ratio = 0.0
        n_lambdas = 1
        n_folds = 1
        n_alphas = 1
        tol = tol
        tol_seek_factor = 1E-1
        lambda_stop_early = False
        glm_stop_early = glm_stop_early
        glm_stop_early_error_fraction = glm_stop_early_error_fraction
        max_iter = 5000
        verbose = verbose
        family = 'elasticnet'
        lambda_max = 0.0
        alpha_max = 0.0
        alpha_min = 0.0
        alphas = None
        lambdas = None

        self.model_h2o4gpu = elastic_net.ElasticNetH2O(
            n_threads=n_threads,
            n_gpus=n_gpus,
            fit_intercept=fit_intercept,
            lambda_min_ratio=lambda_min_ratio,
            n_lambdas=n_lambdas,
            n_folds=n_folds,
            n_alphas=n_alphas,
            tol=tol,
            lambda_stop_early=lambda_stop_early,
            glm_stop_early=glm_stop_early,
            glm_stop_early_error_fraction=glm_stop_early_error_fraction,
            max_iter=max_iter,
            verbose=verbose,
            lambda_max=lambda_max,
            alpha_max=alpha_max,
            alpha_min=alpha_min,
            alphas=alphas,
            lambdas=lambdas,
            tol_seek_factor=tol_seek_factor,
            family=family,
            order=None)

        if self.do_sklearn:
            print("Running sklearn Linear Regression")
            self.model = self.model_sklearn
        else:
            print("Running h2o4gpu Linear Regression")
            self.model = self.model_h2o4gpu