def setUpClass(cls): cls._conf = unit_test_utils.get_default_spark_conf( cls._spark_options_from_params) cls._spark = SparkSession.builder.config(conf=cls._conf).getOrCreate() cls._hc = H2OContext.getOrCreate( cls._spark, H2OConf(cls._spark).set_cluster_size(1))
def setUpClass(cls): cls._spark = SparkSession.builder.config( conf=unit_test_utils.get_default_spark_conf()).getOrCreate() unit_test_utils.set_up_class(cls) cls._hc = H2OContext.getOrCreate( cls._spark, H2OConf(cls._spark).set_num_of_external_h2o_nodes(2))
def createH2OConf(): conf = H2OConf() conf.setClusterSize(1) conf.useAutoClusterStart() conf.setExternalClusterMode() conf.setLogLevel("INFO") return conf
def setUpClass(cls): cls._conf = unit_test_utils.get_default_spark_conf( cls._spark_options_from_params) cls._spark = SparkSession.builder.config(conf=cls._conf).getOrCreate() cls._hc = H2OContext.getOrCreate( cls._spark, H2OConf(cls._spark).set_num_of_external_h2o_nodes(1))
def createH2OConf(): conf = H2OConf() conf.setClusterSize(1) conf.set("spark.ext.h2o.rest.api.based.client", "true") conf.useAutoClusterStart() conf.setExternalClusterMode() return conf
def setUpClass(cls): cls._spark = SparkSession.builder.config( conf=test_utils.get_default_spark_conf().set( "spark.ext.h2o.cloud.name", "test-cloud")).getOrCreate() test_utils.set_up_class(cls) h2o_conf = H2OConf(cls._spark).set_num_of_external_h2o_nodes(2) cls._hc = H2OContext.getOrCreate(cls._spark, h2o_conf)
def getOrCreate(spark_context, conf=None): """ Get existing or create new H2OContext based on provided H2O configuration. If the conf parameter is set then configuration from it is used. Otherwise the configuration properties passed to Sparkling Water are used. If the values are not found the default values are used in most of the cases. The default cluster mode is internal, ie. spark.ext.h2o.external.cluster.mode=false param - Spark Context returns H2O Context """ h2o_context = H2OContext(spark_context) jvm = h2o_context._jvm # JVM jsc = h2o_context._jsc # JavaSparkContext if conf is not None: selected_conf = conf else: selected_conf = H2OConf(spark_context) # Create H2OContext jhc = jvm.org.apache.spark.h2o.JavaH2OContext.getOrCreate( jsc, selected_conf._jconf) h2o_context._jhc = jhc h2o_context._conf = selected_conf h2o_context._client_ip = jhc.h2oLocalClientIp() h2o_context._client_port = jhc.h2oLocalClientPort() # Create H2O REST API client h2o.connect(ip=h2o_context._client_ip, port=h2o_context._client_port) h2o_context.is_initialized = True return h2o_context
def createH2OConf(spark): conf = H2OConf(spark) conf.set_cluster_size(1) conf.set("spark.ext.h2o.rest.api.based.client", "true") conf.use_auto_cluster_start() conf.set_external_cluster_mode() conf.set_h2o_node_web_enabled() return conf
def setUpClass(cls): cls._cloud_name = generic_test_utils.unique_cloud_name("h2o_conf_test") cls._spark = SparkSession.builder.config( conf=unit_test_utils.get_default_spark_conf().set( "spark.ext.h2o.cloud.name", cls._cloud_name)).getOrCreate() unit_test_utils.set_up_class(cls) h2o_conf = H2OConf(cls._spark).set_num_of_external_h2o_nodes(2) cls._hc = H2OContext.getOrCreate(cls._spark, h2o_conf)
def setUpClass(cls): cls._cloud_name = generic_test_utils.unique_cloud_name("h2o_conf_test") cls._conf = unit_test_utils.get_default_spark_conf(cls._spark_options_from_params). \ set("spark.ext.h2o.cloud.name", cls._cloud_name) cls._spark = SparkSession.builder.config(conf=cls._conf).getOrCreate() cls._hc = H2OContext.getOrCreate( cls._spark, H2OConf(cls._spark).set_cluster_size(1))
def setUpClass(cls): cls._spark = SparkSession.builder.config( conf=unit_test_utils.get_default_spark_conf().setMaster( "yarn-client")).getOrCreate() unit_test_utils.set_up_class(cls) cls._hc = H2OContext.getOrCreate( cls._spark, H2OConf(cls._spark).set_cluster_size(1))
def setUpClass(cls): cls._conf = unit_test_utils.get_default_spark_conf( cls._spark_options_from_params) cls._spark = SparkSession.builder.config(conf=cls._conf).getOrCreate() cls._hc = H2OContext.getOrCreate( cls._spark, H2OConf(cls._spark).set_cluster_size(1)) cls.dataset = cls._spark.read.csv( "file://" + unit_test_utils.locate("smalldata/iris/iris_wheader.csv"), header=True, inferSchema=True)
def getOrCreate(spark, conf=None, verbose=True, **kwargs): """ Get existing or create new H2OContext based on provided H2O configuration. If the conf parameter is set then configuration from it is used. Otherwise the configuration properties passed to Sparkling Water are used. If the values are not found the default values are used in most of the cases. The default cluster mode is internal, ie. spark.ext.h2o.external.cluster.mode=false param - Spark Context or Spark Session returns H2O Context """ spark_session = spark if isinstance(spark, SparkContext): warnings.warn( "Method H2OContext.getOrCreate with argument of type SparkContext is deprecated and " + "parameter of type SparkSession is preferred.") spark_session = SparkSession.builder.getOrCreate() h2o_context = H2OContext(spark_session) jvm = h2o_context._jvm # JVM jspark_session = h2o_context._jspark_session # Java Spark Session if conf is not None: selected_conf = conf else: selected_conf = H2OConf(spark_session) # Create backing Java H2OContext jhc = jvm.org.apache.spark.h2o.JavaH2OContext.getOrCreate( jspark_session, selected_conf._jconf) h2o_context._jhc = jhc h2o_context._conf = selected_conf h2o_context._client_ip = jhc.h2oLocalClientIp() h2o_context._client_port = jhc.h2oLocalClientPort() # Create H2O REST API client h2o.connect(ip=h2o_context._client_ip, port=h2o_context._client_port, verbose=verbose, **kwargs) h2o_context.is_initialized = True if verbose: print(h2o_context) # Stop h2o when running standalone pysparkling scripts, only in client deploy mode #, so the user does not need explicitly close h2o. # In driver mode the application would call exit which is handled by Spark AM as failure deploy_mode = spark_session.sparkContext._conf.get( "spark.submit.deployMode") if deploy_mode != "cluster": atexit.register(lambda: h2o_context.__stop()) return h2o_context
def getOrCreate(spark_context, conf=None): """ Get existing or create new H2OContext based on provided H2O configuration. If the conf parameter is set then configuration from it is used. Otherwise the configuration properties passed to Sparkling Water are used. If the values are not found the default values are used in most of the cases. The default cluster mode is internal, ie. spark.ext.h2o.external.cluster.mode=false param - Spark Context returns H2O Context """ h2o_context = H2OContext(spark_context) jvm = h2o_context._jvm # JVM gw = h2o_context._gw # Py4J Gateway jsc = h2o_context._jsc # JavaSparkContext # Imports Sparkling Water into current JVM view # We cannot use directly Py4j to import Sparkling Water packages # java_import(sc._jvm, "org.apache.spark.h2o.*") # because of https://issues.apache.org/jira/browse/SPARK-5185 # So lets load class directly via classloader # This is finally fixed in Spark 2.0 ( along with other related issues) # Call the corresponding getOrCreate method jhc_klazz = jvm.java.lang.Thread.currentThread().getContextClassLoader( ).loadClass("org.apache.spark.h2o.JavaH2OContext") conf_klazz = jvm.java.lang.Thread.currentThread( ).getContextClassLoader().loadClass("org.apache.spark.h2o.H2OConf") method_def = gw.new_array(jvm.Class, 2) method_def[0] = jsc.getClass() method_def[1] = conf_klazz method = jhc_klazz.getMethod("getOrCreate", method_def) method_params = gw.new_array(jvm.Object, 2) method_params[0] = jsc if conf is not None: selected_conf = conf else: selected_conf = H2OConf(spark_context) method_params[1] = selected_conf._jconf jhc = method.invoke(None, method_params) h2o_context._jhc = jhc h2o_context._conf = selected_conf h2o_context._client_ip = jhc.h2oLocalClientIp() h2o_context._client_port = jhc.h2oLocalClientPort() h2o.init(ip=h2o_context._client_ip, port=h2o_context._client_port, start_h2o=False, strict_version_check=False) return h2o_context
def getOrCreate(spark, conf=None, **kwargs): """ Get existing or create new H2OContext based on provided H2O configuration. If the conf parameter is set then configuration from it is used. Otherwise the configuration properties passed to Sparkling Water are used. If the values are not found the default values are used in most of the cases. The default cluster mode is internal, ie. spark.ext.h2o.external.cluster.mode=false param - Spark Context or Spark Session returns H2O Context """ spark_session = spark if isinstance(spark, SparkContext): warnings.warn( "Method H2OContext.getOrCreate with argument of type SparkContext is deprecated and " + "parameter of type SparkSession is preferred.") spark_session = SparkSession.builder.getOrCreate() h2o_context = H2OContext(spark_session) jvm = h2o_context._jvm # JVM jsc = h2o_context._jsc # JavaSparkContext if conf is not None: selected_conf = conf else: selected_conf = H2OConf(spark_session) # Create backing Java H2OContext jhc = jvm.org.apache.spark.h2o.JavaH2OContext.getOrCreate( jsc, selected_conf._jconf) h2o_context._jhc = jhc h2o_context._conf = selected_conf h2o_context._client_ip = jhc.h2oLocalClientIp() h2o_context._client_port = jhc.h2oLocalClientPort() # Create H2O REST API client h2o.connect(ip=h2o_context._client_ip, port=h2o_context._client_port, **kwargs) h2o_context.is_initialized = True # Stop h2o when running standalone pysparkling scripts and the user does not explicitly close h2o atexit.register(lambda: h2o_context.stop_with_jvm()) return h2o_context
def setUpClass(cls): cls._sc = SparkContext(conf=test_utils.get_default_spark_conf()) test_utils.set_up_class(cls) cls._hc = H2OContext.getOrCreate( cls._sc, H2OConf(cls._sc).set_num_of_external_h2o_nodes(2))
def setUpClass(cls): cls._sc = SparkContext(conf=test_utils.get_default_spark_conf().set( "spark.ext.h2o.cloud.name", "test-cloud")) test_utils.set_up_class(cls) h2o_conf = H2OConf(cls._sc).set_num_of_external_h2o_nodes(2) cls._hc = H2OContext.getOrCreate(cls._sc, h2o_conf)
def hc(spark): return H2OContext.getOrCreate(H2OConf().setClusterSize(1))
def hc(spark): return H2OContext.getOrCreate(spark, H2OConf(spark).set_cluster_size(1))
def setUpClass(cls): cls._sc = SparkContext(conf = test_utils.get_default_spark_conf()) test_utils.set_up_class(cls) cls._hc = H2OContext.getOrCreate(cls._sc, H2OConf(cls._sc))