Exemplo n.º 1
0
def test_frame_reload():
    work_dir = tempfile.mkdtemp()
    iris = h2o.import_file(
        path=pyunit_utils.locate("smalldata/iris/iris_wheader.csv"))
    df_key = iris.key
    df_pd_orig = iris.as_data_frame()
    iris.save(work_dir)
    try:
        iris.save(work_dir, force=False)  # fails because file exists
    except H2OResponseError as e:
        assert e.args[0].exception_msg.startswith("File already exists")
    try:
        h2o.load_frame(df_key, work_dir,
                       force=False)  # fails because frame exists
    except H2OResponseError as e:
        assert e.args[
            0].exception_msg == "Frame Key<Frame> iris_wheader.hex already exists."
    df_loaded_force = h2o.load_frame(df_key, work_dir)
    h2o.remove(iris)
    df_loaded = h2o.load_frame(df_key, work_dir, force=False)
    df_pd_loaded_force = df_loaded_force.as_data_frame()
    df_pd_loaded = df_loaded.as_data_frame()
    assert df_pd_orig.equals(df_pd_loaded_force)
    assert df_pd_orig.equals(df_pd_loaded)

    # try running grid search on the frame
    h2o.remove_all()
    df_loaded = h2o.load_frame(df_key, work_dir)
    hyper_parameters = OrderedDict()
    hyper_parameters["ntrees"] = [5, 10, 20, 30]
    grid_small = H2OGridSearch(H2OGradientBoostingEstimator,
                               hyper_params=hyper_parameters)
    grid_small.train(x=list(range(4)), y=4, training_frame=df_loaded)
    assert len(grid_small.models) == 4
Exemplo n.º 2
0
    def test_frame_reload(self):
        name_node = utils.hadoop_namenode()
        work_dir = utils.get_workdir()
        dataset = "/datasets/mnist/train.csv.gz"

        saver_cluster_name = "saver-py"
        try:
            cluster_1 = utils.start_cluster(saver_cluster_name)
            h2o.connect(url=cluster_1)
            df_orig = h2o.import_file(path="hdfs://%s%s" %
                                      (name_node, dataset))
            df_key = df_orig.key
            df_pd_orig = df_orig.as_data_frame()
            df_orig.save(work_dir)
            h2o.connection().close()
        finally:
            utils.stop_cluster(saver_cluster_name)

        loader_cluster_name = "loader-py"
        try:
            cluster_2 = utils.start_cluster(loader_cluster_name)
            h2o.connect(url=cluster_2)
            df_loaded = h2o.load_frame(df_key, work_dir)
            df_pd_loaded = df_loaded.as_data_frame()
            h2o.connection().close()
        finally:
            utils.stop_cluster(loader_cluster_name)

        self.assertTrue(df_pd_orig.equals(df_pd_loaded))
Exemplo n.º 3
0
def test_frame_reload():
    work_dir = tempfile.mkdtemp()
    iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris_wheader.csv"))
    df_key = iris.key
    df_pd_orig = iris.as_data_frame()
    iris.save(work_dir)
    try:
        iris.save(work_dir, force=False)  # fails because file exists
    except H2OResponseError as e:
        assert e.args[0].exception_msg.startswith("File already exists")
    try:
        h2o.load_frame(df_key, work_dir, force=False)  # fails because frame exists
    except H2OResponseError as e:
        assert e.args[0].exception_msg == "Frame Key<Frame> iris_wheader.hex already exists."
    df_loaded_force = h2o.load_frame(df_key, work_dir) 
    h2o.remove(iris)
    df_loaded = h2o.load_frame(df_key, work_dir, force=False)
    df_pd_loaded_force = df_loaded_force.as_data_frame()
    df_pd_loaded = df_loaded.as_data_frame()
    assert df_pd_orig.equals(df_pd_loaded_force)
    assert df_pd_orig.equals(df_pd_loaded)