Exemplo n.º 1
0
Arquivo: h2o.py Projeto: feijoas/h2o-3
def init(url=None, ip=None, port=None, https=None, insecure=False, username=None, password=None, cluster_name=None,
         proxy=None, start_h2o=True, nthreads=-1, ice_root=None, enable_assertions=True,
         max_mem_size=None, min_mem_size=None, strict_version_check=True, **kwargs):
    """
    Attempt to connect to a local server, or if not successful start a new server and connect to it.

    :param url:
    :param ip:
    :param port:
    :param https:
    :param insecure:
    :param username:
    :param password:
    :param cluster_name:
    :param proxy:
    :param start_h2o:
    :param nthreads:
    :param ice_root:
    :param enable_assertions:
    :param max_mem_size:
    :param min_mem_size:
    :param strict_version_check:
    :param kwargs: (all other deprecated attributes)
    :returns: nothing
    """
    scheme = "https" if https else "http"
    proxy = proxy[scheme] if proxy is not None and scheme in proxy else \
        kwargs["proxies"][scheme] if "proxies" in kwargs and scheme in kwargs["proxies"] else None
    mmax = int(max_mem_size) if max_mem_size is not None else \
        kwargs["max_mem_size_GB"] << 30 if "max_mem_size_GB" in kwargs else None
    mmin = int(min_mem_size) if min_mem_size is not None else \
        kwargs["min_mem_size_GB"] << 30 if "min_mem_size_GB" in kwargs else None
    auth = (username, password) if username and password else None
    if not start_h2o:
        print("Warning: if you don't want to start local H2O server, then use of `h2o.connect()` is preferred.")
    if ip and ip != "localhost" and ip != "127.0.0.1" and start_h2o:
        print("Warning: connecting to remote server but falling back to local... Did you mean to use `h2o.connect()`?")
    try:
        connect(url=url, ip=ip, port=port, https=https, verify_ssl_certificates=not insecure, auth=auth,
                proxy=proxy, cluster_name=cluster_name, verbose=True)
    except H2OConnectionError:
        # Backward compatibility: in init() port parameter really meant "baseport" when starting a local server...
        if port and not str(port).endswith("+"):
            port = str(port) + "+"
        if not start_h2o: raise
        global h2oconn
        hs = H2OLocalServer.start(nthreads=nthreads, enable_assertions=enable_assertions, max_mem_size=mmax,
                                  min_mem_size=mmin, ice_root=ice_root, port=port)
        h2oconn = H2OConnection.open(server=hs, https=https, verify_ssl_certificates=not insecure,
                                     auth=auth, proxy=proxy, cluster_name=cluster_name, verbose=True)
    if strict_version_check:
        version_check()

    if h2oconn.info().build_too_old:
        print("Warning: Your H2O cluster version is too old ({})! Please download and install the latest version from http://h2o.ai/download/".format(h2oconn.info().build_age))
Exemplo n.º 2
0
def mojo_predict_csv(input_csv_path, mojo_zip_path, output_csv_path=None, genmodel_jar_path=None, classpath=None, java_options=None, verbose=False):
    """
    MOJO scoring function to take a CSV file and use MOJO model as zip file to score.
    :param input_csv_path: Path to input CSV file.
    :param mojo_zip_path: Path to MOJO zip downloaded from H2O.
    :param output_csv_path: Optional, name of the output CSV file with computed predictions. If None (default), then
    predictions will be saved as prediction.csv in the same folder as the MOJO zip.
    :param genmodel_jar_path: Optional, path to genmodel jar file. If None (default) then the h2o-genmodel.jar in the same
    folder as the MOJO zip will be used.
    :param classpath: Optional, specifies custom user defined classpath which will be used when scoring. If None
    (default) then the default classpath for this MOJO model will be used.
    :param java_options: Optional, custom user defined options for Java. By default '-Xmx4g -XX:ReservedCodeCacheSize=256m' is used.
    :param verbose: Optional, if True, then additional debug information will be printed. False by default.
    :return: List of computed predictions
    """
    default_java_options = '-Xmx4g -XX:ReservedCodeCacheSize=256m'
    prediction_output_file = 'prediction.csv'

    # Checking java
    java = H2OLocalServer._find_java()
    H2OLocalServer._check_java(java=java, verbose=verbose)

    # Ensure input_csv exists
    if verbose:
        print("input_csv:\t%s" % input_csv_path)
    if not os.path.isfile(input_csv_path):
        raise RuntimeError("Input csv cannot be found at %s" % input_csv_path)

    # Ensure mojo_zip exists
    mojo_zip_path = os.path.abspath(mojo_zip_path)
    if verbose:
        print("mojo_zip:\t%s" % mojo_zip_path)
    if not os.path.isfile(mojo_zip_path):
        raise RuntimeError("MOJO zip cannot be found at %s" % mojo_zip_path)

    parent_dir = os.path.dirname(mojo_zip_path)

    # Set output_csv if necessary
    if output_csv_path is None:
        output_csv_path = os.path.join(parent_dir, prediction_output_file)

    # Set path to h2o-genmodel.jar if necessary and check it's valid
    if genmodel_jar_path is None:
        genmodel_jar_path = os.path.join(parent_dir, gen_model_file_name)
    if verbose:
        print("genmodel_jar:\t%s" % genmodel_jar_path)
    if not os.path.isfile(genmodel_jar_path):
        raise RuntimeError("Genmodel jar cannot be found at %s" % genmodel_jar_path)

    if verbose and output_csv_path is not None:
        print("output_csv:\t%s" % output_csv_path)

    # Set classpath if necessary
    if classpath is None:
        classpath = genmodel_jar_path
    if verbose:
        print("classpath:\t%s" % classpath)

    # Set java_options if necessary
    if java_options is None:
        java_options = default_java_options
    if verbose:
        print("java_options:\t%s" % java_options)

    # Construct command to invoke java
    cmd = [java]
    for option in java_options.split(' '):
        cmd += [option]
    cmd += ["-cp", classpath, h2o_predictor_class, "--mojo", mojo_zip_path, "--input", input_csv_path,
            '--output', output_csv_path, '--decimal']
    if verbose:
        cmd_str = " ".join(cmd)
        print("java cmd:\t%s" % cmd_str)

    # invoke the command
    subprocess.check_call(cmd, shell=False)

    # load predictions in form of a dict
    with open(output_csv_path) as csv_file:
        result = list(csv.DictReader(csv_file))
    return result
Exemplo n.º 3
0
def mojo_predict_csv(input_csv_path,
                     mojo_zip_path,
                     output_csv_path=None,
                     genmodel_jar_path=None,
                     classpath=None,
                     java_options=None,
                     verbose=False,
                     setInvNumNA=False,
                     predict_contributions=False):
    """
    MOJO scoring function to take a CSV file and use MOJO model as zip file to score.

    :param input_csv_path: Path to input CSV file.
    :param mojo_zip_path: Path to MOJO zip downloaded from H2O.
    :param output_csv_path: Optional, name of the output CSV file with computed predictions. If None (default), then
        predictions will be saved as prediction.csv in the same folder as the MOJO zip.
    :param genmodel_jar_path: Optional, path to genmodel jar file. If None (default) then the h2o-genmodel.jar in the same
        folder as the MOJO zip will be used.
    :param classpath: Optional, specifies custom user defined classpath which will be used when scoring. If None
        (default) then the default classpath for this MOJO model will be used.
    :param java_options: Optional, custom user defined options for Java. By default ``-Xmx4g -XX:ReservedCodeCacheSize=256m`` is used.
    :param verbose: Optional, if True, then additional debug information will be printed. False by default.
    :param predict_contributions: if True, then return prediction contributions instead of regular predictions 
        (only for tree-based models).
    :return: List of computed predictions
    """
    default_java_options = '-Xmx4g -XX:ReservedCodeCacheSize=256m'
    prediction_output_file = 'prediction.csv'

    # Checking java
    java = H2OLocalServer._find_java()
    H2OLocalServer._check_java(java=java, verbose=verbose)

    # Ensure input_csv exists
    if verbose:
        print("input_csv:\t%s" % input_csv_path)
    if not os.path.isfile(input_csv_path):
        raise RuntimeError("Input csv cannot be found at %s" % input_csv_path)

    # Ensure mojo_zip exists
    mojo_zip_path = os.path.abspath(mojo_zip_path)
    if verbose:
        print("mojo_zip:\t%s" % mojo_zip_path)
    if not os.path.isfile(mojo_zip_path):
        raise RuntimeError("MOJO zip cannot be found at %s" % mojo_zip_path)

    parent_dir = os.path.dirname(mojo_zip_path)

    # Set output_csv if necessary
    if output_csv_path is None:
        output_csv_path = os.path.join(parent_dir, prediction_output_file)

    # Set path to h2o-genmodel.jar if necessary and check it's valid
    if genmodel_jar_path is None:
        genmodel_jar_path = os.path.join(parent_dir, gen_model_file_name)
    if verbose:
        print("genmodel_jar:\t%s" % genmodel_jar_path)
    if not os.path.isfile(genmodel_jar_path):
        raise RuntimeError("Genmodel jar cannot be found at %s" %
                           genmodel_jar_path)

    if verbose and output_csv_path is not None:
        print("output_csv:\t%s" % output_csv_path)

    # Set classpath if necessary
    if classpath is None:
        classpath = genmodel_jar_path
    if verbose:
        print("classpath:\t%s" % classpath)

    # Set java_options if necessary
    if java_options is None:
        java_options = default_java_options
    if verbose:
        print("java_options:\t%s" % java_options)

    # Construct command to invoke java
    cmd = [java]
    for option in java_options.split(' '):
        cmd += [option]
    cmd += [
        "-cp", classpath, h2o_predictor_class, "--mojo", mojo_zip_path,
        "--input", input_csv_path, '--output', output_csv_path, '--decimal'
    ]

    if setInvNumNA:
        cmd.append('--setConvertInvalidNum')

    if predict_contributions:
        cmd.append('--predictContributions')

    if verbose:
        cmd_str = " ".join(cmd)
        print("java cmd:\t%s" % cmd_str)

    # invoke the command
    subprocess.check_call(cmd, shell=False)

    # load predictions in form of a dict
    with open(output_csv_path) as csv_file:
        result = list(csv.DictReader(csv_file))
    return result
Exemplo n.º 4
0
    def test_numeric_response_error(self):
        
        ######  Versions that must fail  ######
            
        with self.assertRaises(H2OStartupError) as err:
            H2OLocalServer._has_compatible_version(
                'java version "1.6.0_45"\n' +
                'Java(TM) SE Runtime Environment (build 1.6.0_45-b06)\n' +
                'Java HotSpot(TM) 64-Bit Server VM (build 20.45-b01, mixed mode)\n'
            )
        assert "Your java is not supported: java version \"1.6.0_45\"; Java(TM) SE Runtime Environment (build 1.6.0_45-b06); Java HotSpot(TM) 64-Bit Server VM (build 20.45-b01, mixed mode)" \
            in str(err.exception)

        with self.assertRaises(H2OStartupError) as err:
            H2OLocalServer._has_compatible_version(
                'java version "1.6.0_45"\n' +
                'Java(TM) SE Runtime Environment (build 1.6.0_45-b06)\n' +
                'Oracle JRockit(R) (build R28.2.7-7-155314-1.6.0_45-20130329-0641-linux-x86_64, compiled mode)\n'
            )
        assert "Your java is not supported: java version \"1.6.0_45\"; Java(TM) SE Runtime Environment (build 1.6.0_45-b06); Oracle JRockit(R) (build R28.2.7-7-155314-1.6.0_45-20130329-0641-linux-x86_64, compiled mode)" \
               in str(err.exception)

        with self.assertRaises(H2OStartupError) as err:
            H2OLocalServer._has_compatible_version(
                'java version "1.6.0_38"\n' +
                'OpenJDK Runtime Environment (IcedTea6 1.13.10) (6b38-1.13.10-1~deb7u1)\n' +
                'OpenJDK 64-Bit Server VM (build 23.25-b01, mixed mode)\n'
            )
        assert "Your java is not supported: java version \"1.6.0_38\"; OpenJDK Runtime Environment (IcedTea6 1.13.10) (6b38-1.13.10-1~deb7u1); OpenJDK 64-Bit Server VM (build 23.25-b01, mixed mode)" \
               in str(err.exception)

        with self.assertRaises(H2OStartupError) as err:
            H2OLocalServer._has_compatible_version(
                'java version "1.7.0_121"\n' +
                'OpenJDK Runtime Environment (IcedTea 2.6.8) (Alpine 7.121.2.6.8-r0)\n' +
                'OpenJDK 64-Bit Server VM (build 24.121-b00, mixed mode)\n'
            )
        assert "Your java is not supported: java version \"1.7.0_121\"; OpenJDK Runtime Environment (IcedTea 2.6.8) (Alpine 7.121.2.6.8-r0); OpenJDK 64-Bit Server VM (build 24.121-b00, mixed mode)" \
               in str(err.exception)

        with self.assertRaises(H2OStartupError) as err:
            H2OLocalServer._has_compatible_version(
                'java version "1.7.0_111"\n' +
                'OpenJDK Runtime Environment (IcedTea 2.6.7) (7u111-2.6.7-2~deb8u1)\n' +
                'OpenJDK 64-Bit Server VM (build 24.111-b01, mixed mode)\n'
            )
        assert "Your java is not supported: java version \"1.7.0_111\"; OpenJDK Runtime Environment (IcedTea 2.6.7) (7u111-2.6.7-2~deb8u1); OpenJDK 64-Bit Server VM (build 24.111-b01, mixed mode)" \
               in str(err.exception)

        with self.assertRaises(H2OStartupError) as err:
            H2OLocalServer._has_compatible_version(
                'java version "1.7.0_121"\n' +
                'OpenJDK Runtime Environment (IcedTea 2.6.8) (Alpine 7.121.2.6.8-r0)\n' +
                'OpenJDK 64-Bit Server VM (build 24.121-b00, mixed mode)\n'
            )
        assert "Your java is not supported: java version \"1.7.0_121\"; OpenJDK Runtime Environment (IcedTea 2.6.8) (Alpine 7.121.2.6.8-r0); OpenJDK 64-Bit Server VM (build 24.121-b00, mixed mode)" \
               in str(err.exception)


        ###### Versions that must pass  ######

        H2OLocalServer._has_compatible_version(
            'java version "13.0.1" 2019-10-15\n' +
            'Java(TM) SE Runtime Environment (build 13.0.1+9)\n' +
            'Java HotSpot(TM) 64-Bit Server VM (build 13.0.1+9, mixed mode, sharing)\n'
        )

        H2OLocalServer._has_compatible_version(
            'java version "1.8.0_181"\n' +
            'Java(TM) SE Runtime Environment (build 1.8.0_181-b13)\n' +
            'Java HotSpot(TM) 64-Bit Server VM (build 25.181-b13, mixed mode)\n'
        )

        H2OLocalServer._has_compatible_version(
            'java version "1.8.0_181"\n' +
            'Java(TM) SE Runtime Environment (build 1.8.0_181-b13)\n' +
            'Java HotSpot(TM) 64-Bit Server VM (build 25.181-b13, mixed mode)\n'
        )

        H2OLocalServer._has_compatible_version(
            'java version "10.0.2" 2018-07-17"\n' +
            'Java(TM) SE Runtime Environment 18.3 (build 10.0.2+13)\n' +
            'Java HotSpot(TM) 64-Bit Server VM 18.3 (build 10.0.2+13, mixed mode)\n'
        )

        H2OLocalServer._has_compatible_version(
            'openjdk version "1.8.0_181"\n' +
            'OpenJDK Runtime Environment (build 1.8.0_181-8u181-b13-1~deb9u1-b13)\n' +
            'OpenJDK 64-Bit Server VM (build 25.181-b13, mixed mode)\n'
        )

        H2OLocalServer._has_compatible_version(
            'openjdk version "9.0.4""\n' +
            'OpenJDK Runtime Environment (build 9.0.4+12-Debian-4)\n' +
            'OpenJDK 64-Bit Server VM (build 9.0.4+12-Debian-4, mixed mode)\n'
        )

        H2OLocalServer._has_compatible_version(
            'openjdk version "10.0.2" 2018-07-17"\n' +
            'OpenJDK Runtime Environment (build 10.0.2+13-Debian-1)\n' +
            'OpenJDK 64-Bit Server VM (build 10.0.2+13-Debian-1, mixed mode)\n'
        )

        H2OLocalServer._has_compatible_version(
            'openjdk version "11" 2018-09-25\n' +
            'OpenJDK Runtime Environment (build 11+24-Debian-1)\n' +
            'OpenJDK 64-Bit Server VM (build 11+24-Debian-1, mixed mode, sharing)\n'
        )