Exemplo n.º 1
0
def get_automl(project_name):
    """
    Retrieve information about an AutoML instance.

    :param str project_name:  A string indicating the project_name of the automl instance to retrieve.
    :returns: A dictionary containing the project_name, leader model, and leaderboard.
    """
    automl_json = h2o.api("GET /99/AutoML/%s" % project_name)
    project_name = automl_json["project_name"]
    leaderboard_list = [key["name"] for key in automl_json['leaderboard']['models']]

    if leaderboard_list is not None and len(leaderboard_list) > 0:
        leader_id = leaderboard_list[0]
    else:
        leader_id = None

    leader = h2o.get_model(leader_id)
    # Intentionally mask the progress bar here since showing multiple progress bars is confusing to users.
    # If any failure happens, revert back to user's original setting for progress and display the error message.
    is_progress = H2OJob.__PROGRESS_BAR__
    h2o.no_progress()
    try:
        # Parse leaderboard H2OTwoDimTable & return as an H2OFrame
        leaderboard = h2o.H2OFrame(
            automl_json["leaderboard_table"].cell_values,
            column_names=automl_json["leaderboard_table"].col_header)
    except Exception as ex:
        raise ex
    finally:
        if is_progress is True:
            h2o.show_progress()

    leaderboard = leaderboard[1:]
    automl_dict = {'project_name': project_name, "leader": leader, "leaderboard": leaderboard}
    return automl_dict
Exemplo n.º 2
0
    def _fetch(self):
        res = h2o.api("GET /99/AutoML/" + self._automl_key)
        leaderboard_list = [key["name"] for key in res['leaderboard']['models']]

        if leaderboard_list is not None and len(leaderboard_list) > 0:
            self._leader_id = leaderboard_list[0]
        else:
            self._leader_id = None

        # Intentionally mask the progress bar here since showing multiple progress bars is confusing to users.
        # If any failure happens, revert back to user's original setting for progress and display the error message.
        is_progress = H2OJob.__PROGRESS_BAR__
        h2o.no_progress()
        try:
            # Parse leaderboard H2OTwoDimTable & return as an H2OFrame
            leaderboard = h2o.H2OFrame(
                res["leaderboard_table"].cell_values,
                column_names=res["leaderboard_table"].col_header)
        except Exception as ex:
            raise ex
        finally:
            if is_progress is True:
                h2o.show_progress()

        self._leaderboard = leaderboard[1:]
        return self._leader_id is not None
Exemplo n.º 3
0
    def _fetch(self):
        res = h2o.api("GET /99/AutoML/" + self.project_name)
        leaderboard_list = [key["name"] for key in res['leaderboard']['models']]

        if leaderboard_list is not None and len(leaderboard_list) > 0:
            self._leader_id = leaderboard_list[0]
        else:
            self._leader_id = None

        # Intentionally mask the progress bar here since showing multiple progress bars is confusing to users.
        # If any failure happens, revert back to user's original setting for progress and display the error message.
        is_progress = H2OJob.__PROGRESS_BAR__
        h2o.no_progress()
        try:
            # Parse leaderboard H2OTwoDimTable & return as an H2OFrame
            leaderboard = h2o.H2OFrame(
                res["leaderboard_table"].cell_values,
                column_names=res["leaderboard_table"].col_header)
        except Exception as ex:
            raise ex
        finally:
            if is_progress is True:
                h2o.show_progress()

        self._leaderboard = leaderboard[1:]
        return self._leader_id is not None
Exemplo n.º 4
0
def h2oshow_progress():
    """
    Python API test: h2o.show_progress()

    Command is verified by eyeballing the pyunit test output file and make sure the progress bars are there.
    Here, we will assume the command runs well if there is no error message.
    """
    try:  # only only work with Python 3.
        s = StringIO()
        sys.stdout = s  # redirect output
        h2o.show_progress()  # true by default.
        training_data = h2o.upload_file(
            pyunit_utils.locate("smalldata/logreg/benign.csv"))
        Y = 3
        X = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10]
        model = H2OGeneralizedLinearEstimator(family="binomial",
                                              alpha=0,
                                              Lambda=1e-5)
        model.train(x=X, y=Y, training_frame=training_data)
        sys.stdout = sys.__stdout__  # restore old stdout
        # make sure the word progress is found and % is found.  That is how progress is displayed.
        assert ("progress" in s.getvalue()) and (
            "100%"
            in s.getvalue()), "h2o.show_progress() command is not working."
    except Exception as e:  # will get error for python 2
        sys.stdout = sys.__stdout__  # restore old stdout
        assert_is_type(e, AttributeError)  # error for using python 2
        assert "encoding" in e.args[
            0], "h2o.show_progress() command is not working."
        allargs = inspect.getargspec(h2o.show_progress)
        assert len(
            allargs.args) == 0, "h2o.show_progress() should have no arguments!"
Exemplo n.º 5
0
def get_automl(project_name):
    """
    Retrieve information about an AutoML instance.

    :param str project_name:  A string indicating the project_name of the automl instance to retrieve.
    :returns: A dictionary containing the project_name, leader model, and leaderboard.
    """
    automl_json = h2o.api("GET /99/AutoML/%s" % project_name)
    project_name = automl_json["project_name"]
    leaderboard_list = [key["name"] for key in automl_json['leaderboard']['models']]

    if leaderboard_list is not None and len(leaderboard_list) > 0:
        leader_id = leaderboard_list[0]
    else:
        leader_id = None

    leader = h2o.get_model(leader_id)
    # Intentionally mask the progress bar here since showing multiple progress bars is confusing to users.
    # If any failure happens, revert back to user's original setting for progress and display the error message.
    is_progress = H2OJob.__PROGRESS_BAR__
    h2o.no_progress()
    try:
        # Parse leaderboard H2OTwoDimTable & return as an H2OFrame
        leaderboard = h2o.H2OFrame(
            automl_json["leaderboard_table"].cell_values,
            column_names=automl_json["leaderboard_table"].col_header)
    except Exception as ex:
        raise ex
    finally:
        if is_progress is True:
            h2o.show_progress()

    leaderboard = leaderboard[1:]
    automl_dict = {'project_name': project_name, "leader": leader, "leaderboard": leaderboard}
    return automl_dict
Exemplo n.º 6
0
def h2oshow_progress():
    """
    Python API test: h2o.show_progress()

    Command is verified by eyeballing the pyunit test output file and make sure the progress bars are there.
    Here, we will assume the command runs well if there is no error message.
    """
    try:    # only only work with Python 3.
        s = StringIO()
        sys.stdout = s   # redirect output
        h2o.show_progress()   # true by default.
        training_data = h2o.upload_file(pyunit_utils.locate("smalldata/logreg/benign.csv"))
        Y = 3
        X = [0, 1, 2, 4, 5, 6, 7, 8, 9, 10]
        model = H2OGeneralizedLinearEstimator(family="binomial", alpha=0, Lambda=1e-5)
        model.train(x=X, y=Y, training_frame=training_data)
        sys.stdout=sys.__stdout__       # restore old stdout
        # make sure the word progress is found and % is found.  That is how progress is displayed.
        assert ("progress" in s.getvalue()) and ("100%" in s.getvalue()), "h2o.show_progress() command is not working."
    except Exception as e:  # will get error for python 2
        sys.stdout=sys.__stdout__       # restore old stdout
        assert_is_type(e, AttributeError)   # error for using python 2
        assert "encoding" in e.args[0], "h2o.show_progress() command is not working."
        allargs = inspect.getargspec(h2o.show_progress)
        assert len(allargs.args)==0, "h2o.show_progress() should have no arguments!"
Exemplo n.º 7
0
for i, v in enumerate(MOST_IMPORTANT_VARS_ORDERD):
    if v in cats:
        MOST_IMPORTANT_VARS_ORDERD[i] = v + '_Tencode'

#
# print('Combining features....')
# (train, valid, test) = feature_combiner(train, test, MOST_IMPORTANT_VARS_ORDERD, valid_frame = valid, frame_type='spark')
# print('Done combining features.')
#
# encoded_combined_nums, cats = get_type_lists(frame=train,rejects=[ID_VAR,Y],frame_type='spark')

################################################################################
#                 DONE WITH PREPROCESSING - START TRAINING                     #
################################################################################
import h2o
h2o.show_progress()  # turn on progress bars
from h2o.estimators.glm import H2OGeneralizedLinearEstimator  # import GLM models
from h2o.estimators.deeplearning import H2ODeepLearningEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.random_forest import H2ORandomForestEstimator
from h2o.grid.grid_search import H2OGridSearch  # grid search
from h2o.estimators.xgboost import H2OXGBoostEstimator
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
import xgboost as xgb
import matplotlib
matplotlib.use('Agg')  #Need this if running matplot on a server w/o display
from pysparkling import *

conf = H2OConf(spark=spark)
conf.nthreads = -1
hc = H2OContext.getOrCreate(spark, conf)
import seaborn as sns

import h2o
from h2o.estimators.glrm import H2OGeneralizedLowRankEstimator
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.random_forest import H2ORandomForestEstimator
from h2o.grid.grid_search import H2OGridSearch
from h2o.estimators.xgboost import H2OXGBoostEstimator
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator

from h2o.estimators.glm import H2OGeneralizedLinearEstimator  # import GLM models
from h2o.grid.grid_search import H2OGridSearch

#import xgboost as xgb
h2o.init(max_mem_size='12G')  # give h2o as much memory as possible
h2o.show_progress()  # turn off h2o progress bars

# Definitions
pd.set_option('display.float_format', lambda x: '%.3f' % x)
#%matplotlib inline
#njobs = 4


def get_type_lists(frame, rejects):
    """Creates lists of numeric and categorical variables.
    :param frame: The frame from which to determine types.
    :param rejects: Variable names not to be included in returned lists.
    :return: Tuple of lists for numeric and categorical variables in the frame.
    """

    nums, cats = [], []