def start(jar_path=None, nthreads=-1, enable_assertions=True, max_mem_size=None, min_mem_size=None, ice_root=None, port="54321+", verbose=True): """ Start new H2O server on the local machine. :param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the locations returned by `._jar_paths()`. :param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used. -1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly. :param enable_assertions: If True, pass `-ea` option to the JVM. :param max_mem_size: Maximum heap size (jvm option Xmx), in bytes. :param min_mem_size: Minimum heap size (jvm option Xms), in bytes. :param ice_root: A directory where H2O stores its temporary files. Default location is determined by tempfile.mkdtemp(). :param port: Port where to start the new server. This could be either an integer, or a string of the form "DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up. :param verbose: If True, then connection info will be printed to the stdout. :returns: a new H2OLocalServer instance """ assert_is_type(jar_path, None, str) assert_is_type(port, None, int, str) assert_is_type(nthreads, -1, BoundInt(1, 4096)) assert_is_type(enable_assertions, bool) assert_is_type(min_mem_size, None, int) assert_is_type(max_mem_size, None, BoundInt(1 << 25)) assert_is_type(ice_root, None, I(str, os.path.isdir)) if jar_path: assert_satisfies(jar_path, jar_path.endswith("h2o.jar")) if min_mem_size is not None and max_mem_size is not None and min_mem_size > max_mem_size: raise H2OValueError("`min_mem_size`=%d is larger than the `max_mem_size`=%d" % (min_mem_size, max_mem_size)) if port is None: port = "54321+" baseport = None # TODO: get rid of this port gimmick and have 2 separate parameters. if is_type(port, str): if port.isdigit(): port = int(port) else: if not(port[-1] == "+" and port[:-1].isdigit()): raise H2OValueError("`port` should be of the form 'DDDD+', where D is a digit. Got: %s" % port) baseport = int(port[:-1]) port = 0 hs = H2OLocalServer() hs._verbose = bool(verbose) hs._jar_path = hs._find_jar(jar_path) hs._ice_root = ice_root if not ice_root: hs._ice_root = tempfile.mkdtemp() hs._tempdir = hs._ice_root if verbose: print("Attempting to start a local H2O server...") hs._launch_server(port=port, baseport=baseport, nthreads=int(nthreads), ea=enable_assertions, mmax=max_mem_size, mmin=min_mem_size) if verbose: print(" Server is running at %s://%s:%d" % (hs.scheme, hs.ip, hs.port)) atexit.register(lambda: hs.shutdown()) return hs
def request(self, endpoint, data=None, json=None, filename=None): """ Perform a REST API request to the backend H2O server. :param endpoint: (str) The endpoint's URL, for example "GET /4/schemas/KeyV4" :param data: data payload for POST (and sometimes GET) requests. This should be a dictionary of simple key/value pairs (values can also be arrays), which will be sent over in x-www-form-encoded format. :param json: also data payload, but it will be sent as a JSON body. Cannot be used together with `data`. :param filename: file to upload to the server. Cannot be used with `data` or `json`. :returns: an H2OResponse object representing the server's response :raises H2OConnectionError: if the H2O server cannot be reached (or connection is not initialized) :raises H2OServerError: if there was a server error (http 500), or server returned malformed JSON :raises H2OResponseError: if the server returned an H2OErrorV3 response (e.g. if the parameters were invalid) """ if self._stage == 0: raise H2OConnectionError("Connection not initialized; run .connect() first.") if self._stage == -1: raise H2OConnectionError("Connection was closed, and can no longer be used.") # Prepare URL assert_is_type(endpoint, str) match = assert_matches(str(endpoint), r"^(GET|POST|PUT|DELETE|PATCH|HEAD) (/.*)$") method = match.group(1) urltail = match.group(2) url = self._base_url + urltail # Prepare data if filename is not None: assert_is_type(filename, str) assert_is_type(json, None, "Argument `json` should be None when `filename` is used.") assert_is_type(data, None, "Argument `data` should be None when `filename` is used.") assert_satisfies(method, method == "POST", "File uploads can only be done via POST method, got %s" % method) elif data is not None: assert_is_type(data, dict) assert_is_type(json, None, "Argument `json` should be None when `data` is used.") elif json is not None: assert_is_type(json, dict) data = self._prepare_data_payload(data) files = self._prepare_file_payload(filename) params = None if method == "GET" and data: params = data data = None # Make the request start_time = time.time() try: self._log_start_transaction(endpoint, data, json, files, params) headers = {"User-Agent": "H2O Python client/" + sys.version.replace("\n", ""), "X-Cluster": self._cluster_name} resp = requests.request(method=method, url=url, data=data, json=json, files=files, params=params, headers=headers, timeout=self._timeout, auth=self._auth, verify=self._verify_ssl_cert, proxies=self._proxies) self._log_end_transaction(start_time, resp) return self._process_response(resp) except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e: if self._local_server and not self._local_server.is_running(): self._log_end_exception("Local server has died.") raise H2OConnectionError("Local server has died unexpectedly. RIP.") else: self._log_end_exception(e) raise H2OConnectionError("Unexpected HTTP error: %s" % e) except requests.exceptions.Timeout as e: self._log_end_exception(e) elapsed_time = time.time() - start_time raise H2OConnectionError("Timeout after %.3fs" % elapsed_time) except H2OResponseError as e: err = e.args[0] err.endpoint = endpoint err.payload = (data, json, files, params) raise
def request(self, endpoint, data=None, json=None, filename=None, save_to=None): """ Perform a REST API request to the backend H2O server. :param endpoint: (str) The endpoint's URL, for example "GET /4/schemas/KeyV4" :param data: data payload for POST (and sometimes GET) requests. This should be a dictionary of simple key/value pairs (values can also be arrays), which will be sent over in x-www-form-encoded format. :param json: also data payload, but it will be sent as a JSON body. Cannot be used together with `data`. :param filename: file to upload to the server. Cannot be used with `data` or `json`. :param save_to: if provided, will write the response to that file (additionally, the response will be streamed, so large files can be downloaded seamlessly). This parameter can be either a file name, or a folder name. If the folder doesn't exist, it will be created automatically. :returns: an H2OResponse object representing the server's response (unless ``save_to`` parameter is provided, in which case the output file's name will be returned). :raises H2OConnectionError: if the H2O server cannot be reached (or connection is not initialized). :raises H2OServerError: if there was a server error (http 500), or server returned malformed JSON. :raises H2OResponseError: if the server returned an H2OErrorV3 response (e.g. if the parameters were invalid). """ if self._stage == 0: raise H2OConnectionError( "Connection not initialized; run .connect() first.") if self._stage == -1: raise H2OConnectionError( "Connection was closed, and can no longer be used.") # Prepare URL assert_is_type(endpoint, str) match = assert_matches( str(endpoint), r"^(GET|POST|PUT|DELETE|PATCH|HEAD|TRACE) (/.*)$") method = match.group(1) urltail = match.group(2) url = self._base_url + urltail # Prepare data if filename is not None: assert_is_type(filename, str) assert_is_type( json, None, "Argument `json` should be None when `filename` is used.") assert_is_type( data, None, "Argument `data` should be None when `filename` is used.") assert_satisfies( method, method == "POST", "File uploads can only be done via POST method, got %s" % method) elif data is not None: assert_is_type(data, dict) assert_is_type( json, None, "Argument `json` should be None when `data` is used.") elif json is not None: assert_is_type(json, dict) request_data = self._prepare_data_payload( data) if filename is None else self._prepare_file_payload(filename) params = None if (method == "GET" or method == "DELETE") and data: params = request_data request_data = None stream = False if save_to is not None: assert_is_type(save_to, str, types.FunctionType) stream = True if self._cookies is not None and isinstance(self._cookies, list): self._cookies = ";".join(self._cookies) # Make the request start_time = time.time() try: self._log_start_transaction(endpoint, request_data, json, filename, params) args = self._request_args() resp = requests.request(method=method, url=url, data=request_data, json=json, params=params, stream=stream, **args) if isinstance(save_to, types.FunctionType): save_to = save_to(resp) self._log_end_transaction(start_time, resp) return self._process_response(resp, save_to) except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e: if self._local_server and not self._local_server.is_running(): self._log_end_exception("Local server has died.") raise H2OConnectionError( "Local server has died unexpectedly. RIP.") else: self._log_end_exception(e) raise H2OConnectionError("Unexpected HTTP error: %s" % e) except requests.exceptions.Timeout as e: self._log_end_exception(e) elapsed_time = time.time() - start_time raise H2OConnectionError("Timeout after %.3fs" % elapsed_time) except H2OResponseError as e: err = e.args[0] if isinstance(err, H2OErrorV3): err.endpoint = endpoint err.payload = (request_data, json, filename, params) raise
def open(server=None, url=None, ip=None, port=None, name=None, https=None, auth=None, verify_ssl_certificates=True, cacert=None, proxy=None, cookies=None, verbose=True, msgs=None, strict_version_check=True): r""" Establish connection to an existing H2O server. The connection is not kept alive, so what this method actually does is attempt to connect to the specified server, and check that the server is healthy and responds to REST API requests. If the H2O server cannot be reached, an :class:`H2OConnectionError` will be raised. On a success, this method returns a new :class:`H2OConnection` object, and it is the only "official" way to create instances of this class. There are 3 ways to specify the target to connect to (these settings are mutually exclusive): * pass a ``server`` option, * pass the full ``url`` for the connection, * provide a triple of parameters ``ip``, ``port``, ``https``. :param H2OLocalServer server: connect to the specified local server instance. There is a slight difference between connecting to a local server by specifying its ip and address, and connecting through an H2OLocalServer instance: if the server becomes unresponsive, then having access to its process handle will allow us to query the server status through OS, and potentially provide snapshot of the server's error log in the exception information. :param url: full url of the server to connect to. :param ip: target server's IP address or hostname (default "localhost"). :param port: H2O server's port (default 54321). :param name: H2O cluster name. :param https: if True then connect using https instead of http (default False). :param verify_ssl_certificates: if False then SSL certificate checking will be disabled (default True). This setting should rarely be disabled, as it makes your connection vulnerable to man-in-the-middle attacks. When used, it will generate a warning from the requests library. Has no effect when ``https`` is False. :param cacert: Path to a CA bundle file or a directory with certificates of trusted CAs (optional). :param auth: authentication token for connecting to the remote server. This can be either a (username, password) tuple, or an authenticator (AuthBase) object. Please refer to the documentation in the ``requests.auth`` module. :param proxy: url address of a proxy server. If you do not specify the proxy, then the requests module will attempt to use a proxy specified in the environment (in HTTP_PROXY / HTTPS_PROXY variables). We check for the presence of these variables and issue a warning if they are found. In order to suppress that warning and use proxy from the environment, pass ``proxy="(default)"``. :param cookies: Cookie (or list of) to add to requests. :param verbose: if True, then connection progress info will be printed to the stdout. :param strict_version_check: If True, an error will be raised if the client and server versions don't match. :param msgs: custom messages to display during connection. This is a tuple (initial message, success message, failure message). :returns: A new :class:`H2OConnection` instance. :raises H2OConnectionError: if the server cannot be reached. :raises H2OServerError: if the server is in an unhealthy state (although this might be a recoverable error, the client itself should decide whether it wants to retry or not). """ if server is not None: assert_is_type(server, H2OLocalServer) assert_is_type( ip, None, "`ip` should be None when `server` parameter is supplied") assert_is_type( url, None, "`url` should be None when `server` parameter is supplied") assert_is_type( name, None, "`name` should be None when `server` parameter is supplied") if not server.is_running(): raise H2OConnectionError( "Unable to connect to server because it is not running") ip = server.ip port = server.port scheme = server.scheme context_path = '' elif url is not None: assert_is_type(url, str) assert_is_type( ip, None, "`ip` should be None when `url` parameter is supplied") assert_is_type(name, str, None) # We don't allow any Unicode characters in the URL. Maybe some day we will... match = assert_matches(url, H2OConnection.url_pattern) scheme = match.group(1) ip = match.group(2) port = int(match.group(3)) context_path = '' if match.group(4) is None else "%s" % ( match.group(4)) else: if ip is None: ip = str("localhost") if port is None: port = 54321 if https is None: https = False if is_type(port, str) and port.isdigit(): port = int(port) assert_is_type(ip, str) assert_is_type(port, int) assert_is_type(name, str, None) assert_is_type(https, bool) assert_matches(ip, r"(?:[\w-]+\.)*[\w-]+") assert_satisfies(port, 1 <= port <= 65535) scheme = "https" if https else "http" context_path = '' if verify_ssl_certificates is None: verify_ssl_certificates = True assert_is_type(verify_ssl_certificates, bool) assert_is_type(cacert, str, None) assert_is_type(proxy, str, None) assert_is_type(auth, AuthBase, (str, str), None) assert_is_type(cookies, str, [str], None) assert_is_type(msgs, None, (str, str, str)) conn = H2OConnection() conn._verbose = bool(verbose) conn._local_server = server conn._base_url = "%s://%s:%d%s" % (scheme, ip, port, context_path) conn._name = server.name if server else name conn._verify_ssl_cert = bool(verify_ssl_certificates) conn._cacert = cacert conn._auth = auth conn._cookies = cookies conn._proxies = None if proxy and proxy != "(default)": conn._proxies = {scheme: proxy} elif not proxy: # Give user a warning if there are any "*_proxy" variables in the environment. [PUBDEV-2504] # To suppress the warning pass proxy = "(default)". for name in os.environ: if name.lower() == scheme + "_proxy": warn("Proxy is defined in the environment: %s. " "This may interfere with your H2O Connection." % name) if "localhost" in conn.ip() or "127.0.0.1" in conn.ip(): # Empty list will cause requests library to respect the default behavior. # Thus a non-existing proxy is inserted. conn._proxies = { "http": None, "https": None, } try: retries = 20 if server else 5 conn._stage = 1 conn._timeout = 3.0 conn._cluster = conn._test_connection(retries, messages=msgs) # If a server is unable to respond within 1s, it should be considered a bug. However we disable this # setting for now, for no good reason other than to ignore all those bugs :( conn._timeout = None # create a weakref to prevent the atexit callback from keeping hard ref # to the connection even after manual close. conn_ref = ref(conn) def exit_close(): con = conn_ref() if con and con.connected: print("Closing connection %s at exit" % con.session_id) con.close() atexit.register(exit_close) except Exception: # Reset _session_id so that we know the connection was not initialized properly. conn._stage = 0 raise conn._cluster.check_version(strict=strict_version_check) return conn
def start(jar_path=None, nthreads=-1, enable_assertions=True, max_mem_size=None, min_mem_size=None, ice_root=None, log_dir=None, log_level=None, max_log_file_size=None, port="54321+", name=None, extra_classpath=None, verbose=True, jvm_custom_args=None, bind_to_localhost=True): """ Start new H2O server on the local machine. :param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the locations returned by `._jar_paths()`. :param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used. -1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly. :param enable_assertions: If True, pass `-ea` option to the JVM. :param max_mem_size: Maximum heap size (jvm option Xmx), in bytes. :param min_mem_size: Minimum heap size (jvm option Xms), in bytes. :param log_dir: Directory for H2O logs to be stored if a new instance is started. Default directory is determined by H2O internally. :param log_level: The logger level for H2O if a new instance is started. :param max_log_file_size: Maximum size of INFO and DEBUG log files. The file is rolled over after a specified size has been reached. (The default is 3MB. Minimum is 1MB and maximum is 99999MB) :param ice_root: A directory where H2O stores its temporary files. Default location is determined by tempfile.mkdtemp(). :param port: Port where to start the new server. This could be either an integer, or a string of the form "DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up. :param name: name of the h2o cluster to be started :param extra_classpath: List of paths to libraries that should be included on the Java classpath. :param verbose: If True, then connection info will be printed to the stdout. :param jvm_custom_args: Custom, user-defined arguments for the JVM H2O is instantiated in :param bind_to_localhost: A flag indicating whether access to the H2O instance should be restricted to the local machine (default) or if it can be reached from other computers on the network. Only applicable when H2O is started from the Python client. :returns: a new H2OLocalServer instance """ assert_is_type(jar_path, None, str) assert_is_type(port, None, int, str) assert_is_type(name, None, str) assert_is_type(nthreads, -1, BoundInt(1, 4096)) assert_is_type(enable_assertions, bool) assert_is_type(min_mem_size, None, int) assert_is_type(max_mem_size, None, BoundInt(1 << 25)) assert_is_type(log_dir, str, None) assert_is_type(log_level, str, None) assert_satisfies( log_level, log_level in [None, "TRACE", "DEBUG", "INFO", "WARN", "ERRR", "FATA"]) assert_is_type(max_log_file_size, str, None) assert_is_type(ice_root, None, I(str, os.path.isdir)) assert_is_type(extra_classpath, None, [str]) assert_is_type(jvm_custom_args, list, None) assert_is_type(bind_to_localhost, bool) if jar_path: assert_satisfies(jar_path, jar_path.endswith("h2o.jar")) if min_mem_size is not None and max_mem_size is not None and min_mem_size > max_mem_size: raise H2OValueError( "`min_mem_size`=%d is larger than the `max_mem_size`=%d" % (min_mem_size, max_mem_size)) if port is None: port = "54321+" baseport = None # TODO: get rid of this port gimmick and have 2 separate parameters. if is_type(port, str): if port.isdigit(): port = int(port) else: if not (port[-1] == "+" and port[:-1].isdigit()): raise H2OValueError( "`port` should be of the form 'DDDD+', where D is a digit. Got: %s" % port) baseport = int(port[:-1]) port = 0 hs = H2OLocalServer() hs._verbose = bool(verbose) hs._jar_path = hs._find_jar(jar_path) hs._extra_classpath = extra_classpath hs._ice_root = ice_root hs._name = name if not ice_root: hs._ice_root = tempfile.mkdtemp() hs._tempdir = hs._ice_root if verbose: print("Attempting to start a local H2O server...") hs._launch_server(port=port, baseport=baseport, nthreads=int(nthreads), ea=enable_assertions, mmax=max_mem_size, mmin=min_mem_size, jvm_custom_args=jvm_custom_args, bind_to_localhost=bind_to_localhost, log_dir=log_dir, log_level=log_level, max_log_file_size=max_log_file_size) if verbose: print(" Server is running at %s://%s:%d" % (hs.scheme, hs.ip, hs.port)) atexit.register(lambda: hs.shutdown()) return hs
def open(server=None, url=None, ip=None, port=None, https=None, auth=None, verify_ssl_certificates=True, proxy=None, cluster_name=None, verbose=True): r""" Establish connection to an existing H2O server. The connection is not kept alive, so what this method actually does is it attempts to connect to the specified server, and checks that the server is healthy and responds to REST API requests. If the H2O server cannot be reached, an :class:`H2OConnectionError` will be raised. On success this method returns a new :class:`H2OConnection` object, and it is the only "official" way to create instances of this class. There are 3 ways to specify the target to connect to (these settings are mutually exclusive): * pass a ``server`` option, * pass the full ``url`` for the connection, * provide a triple of parameters ``ip``, ``port``, ``https``. :param H2OLocalServer server: connect to the specified local server instance. There is a slight difference between connecting to a local server by specifying its ip and address, and connecting through an H2OLocalServer instance: if the server becomes unresponsive, then having access to its process handle will allow us to query the server status through OS, and potentially provide snapshot of the server's error log in the exception information. :param url: full url of the server to connect to. :param ip: target server's IP address or hostname (default "localhost"). :param port: H2O server's port (default 54321). :param https: if True then connect using https instead of http (default False). :param verify_ssl_certificates: if False then SSL certificate checking will be disabled (default True). This setting should rarely be disabled, as it makes your connection vulnerable to man-in-the-middle attacks. When used, it will generate a warning from the requests library. Has no effect when ``https`` is False. :param auth: authentication token for connecting to the remote server. This can be either a (username, password) tuple, or an authenticator (AuthBase) object. Please refer to the documentation in the ``requests.auth`` module. :param proxy: url address of a proxy server. If you do not specify the proxy, then the requests module will attempt to use a proxy specified in the environment (in HTTP_PROXY / HTTPS_PROXY variables). We check for the presence of these variables and issue a warning if they are found. In order to suppress that warning and use proxy from the environment, pass ``proxy="(default)"``. :param cluster_name: name of the H2O cluster to connect to. This option is used from Steam only. :param verbose: if True, then connection progress info will be printed to the stdout. :returns: A new :class:`H2OConnection` instance. :raises H2OConnectionError: if the server cannot be reached. :raises H2OServerError: if the server is in an unhealthy state (although this might be a recoverable error, the client itself should decide whether it wants to retry or not). """ if server is not None: assert_is_type(server, H2OLocalServer) assert_is_type( ip, None, "`ip` should be None when `server` parameter is supplied") assert_is_type( url, None, "`ip` should be None when `server` parameter is supplied") if not server.is_running(): raise H2OConnectionError( "Unable to connect to server because it is not running") ip = server.ip port = server.port scheme = server.scheme elif url is not None: assert_is_type(url, str) assert_is_type( ip, None, "`ip` should be None when `url` parameter is supplied") # We don't allow any Unicode characters in the URL. Maybe some day we will... match = assert_matches( url, r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?$") scheme = match.group(1) ip = match.group(2) port = int(match.group(3)) else: if ip is None: ip = str("localhost") if port is None: port = 54321 if https is None: https = False if is_str(port) and port.isdigit(): port = int(port) assert_is_type(ip, str) assert_is_type(port, int) assert_is_type(https, bool) assert_matches(ip, r"(?:[\w-]+\.)*[\w-]+") assert_satisfies(port, 1 <= port <= 65535) scheme = "https" if https else "http" if verify_ssl_certificates is None: verify_ssl_certificates = True assert_is_type(verify_ssl_certificates, bool) assert_is_type(proxy, str, None) assert_is_type(auth, AuthBase, (str, str), None) assert_is_type(cluster_name, str, None) conn = H2OConnection() conn._verbose = bool(verbose) conn._local_server = server conn._base_url = "%s://%s:%d" % (scheme, ip, port) conn._verify_ssl_cert = bool(verify_ssl_certificates) conn._auth = auth conn._cluster_name = cluster_name conn._proxies = None if proxy and proxy != "(default)": conn._proxies = {scheme: proxy} elif not proxy: # Give user a warning if there are any "*_proxy" variables in the environment. [PUBDEV-2504] # To suppress the warning pass proxy = "(default)". for name in os.environ: if name.lower() == scheme + "_proxy": warn("Proxy is defined in the environment: %s. " "This may interfere with your H2O Connection." % os.environ[name]) try: # Make a fake _session_id, otherwise .request() will complain that the connection is not initialized retries = 20 if server else 5 conn._stage = 1 conn._timeout = 3.0 conn._cluster_info = conn._test_connection(retries) # If a server is unable to respond within 1s, it should be considered a bug. However we disable this # setting for now, for no good reason other than to ignore all those bugs :( conn._timeout = None atexit.register(lambda: conn.close()) except: # Reset _session_id so that we know the connection was not initialized properly. conn._stage = 0 raise return conn
def request(self, endpoint, data=None, json=None, filename=None): """ Perform a REST API request to the backend H2O server. :param endpoint: (str) The endpoint's URL, for example "GET /4/schemas/KeyV4" :param data: data payload for POST (and sometimes GET) requests. This should be a dictionary of simple key/value pairs (values can also be arrays), which will be sent over in x-www-form-encoded format. :param json: also data payload, but it will be sent as a JSON body. Cannot be used together with `data`. :param filename: file to upload to the server. Cannot be used with `data` or `json`. :returns: an H2OResponse object representing the server's response :raises H2OConnectionError: if the H2O server cannot be reached (or connection is not initialized) :raises H2OServerError: if there was a server error (http 500), or server returned malformed JSON :raises H2OResponseError: if the server returned an H2OErrorV3 response (e.g. if the parameters were invalid) """ if self._stage == 0: raise H2OConnectionError( "Connection not initialized; run .connect() first.") if self._stage == -1: raise H2OConnectionError( "Connection was closed, and can no longer be used.") # Prepare URL assert_is_type(endpoint, str) match = assert_matches(str(endpoint), r"^(GET|POST|PUT|DELETE|PATCH|HEAD) (/.*)$") method = match.group(1) urltail = match.group(2) url = self._base_url + urltail # Prepare data if filename is not None: assert_is_type(filename, str) assert_is_type( json, None, "Argument `json` should be None when `filename` is used.") assert_is_type( data, None, "Argument `data` should be None when `filename` is used.") assert_satisfies( method, method == "POST", "File uploads can only be done via POST method, got %s" % method) elif data is not None: assert_is_type(data, dict) assert_is_type( json, None, "Argument `json` should be None when `data` is used.") elif json is not None: assert_is_type(json, dict) data = self._prepare_data_payload(data) files = self._prepare_file_payload(filename) params = None if method == "GET" and data: params = data data = None # Make the request start_time = time.time() try: self._log_start_transaction(endpoint, data, json, files, params) headers = { "User-Agent": "H2O Python client/" + sys.version.replace("\n", ""), "X-Cluster": self._cluster_name } resp = requests.request(method=method, url=url, data=data, json=json, files=files, params=params, headers=headers, timeout=self._timeout, auth=self._auth, verify=self._verify_ssl_cert, proxies=self._proxies) self._log_end_transaction(start_time, resp) return self._process_response(resp) except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e: if self._local_server and not self._local_server.is_running(): self._log_end_exception("Local server has died.") raise H2OConnectionError( "Local server has died unexpectedly. RIP.") else: self._log_end_exception(e) raise H2OConnectionError("Unexpected HTTP error: %s" % e) except requests.exceptions.Timeout as e: self._log_end_exception(e) elapsed_time = time.time() - start_time raise H2OConnectionError("Timeout after %.3fs" % elapsed_time) except H2OResponseError as e: err = e.args[0] err.endpoint = endpoint err.payload = (data, json, files, params) raise
def confusion_matrix(self, metrics=None, thresholds=None): """ Get the confusion matrix for the specified metric :param metrics: A string (or list of strings) in {"min_per_class_accuracy", "absolute_mcc", "tnr", "fnr", "fpr", "tpr", "precision", "accuracy", "f0point5", "f2", "f1","mean_per_class_accuracy"} :param thresholds: A value (or list of values) between 0 and 1 :return: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix (if there is only one) """ # make lists out of metrics and thresholds arguments if metrics is None and thresholds is None: metrics = ["f1"] if isinstance(metrics, list): metrics_list = metrics elif metrics is None: metrics_list = [] else: metrics_list = [metrics] if isinstance(thresholds, list): thresholds_list = thresholds elif thresholds is None: thresholds_list = [] else: thresholds_list = [thresholds] # error check the metrics_list and thresholds_list assert_is_type(thresholds_list, [numeric]) assert_satisfies(thresholds_list, all(0 <= t <= 1 for t in thresholds_list)) if not all(m.lower() in ["min_per_class_accuracy", "absolute_mcc", "precision", "recall", "specificity", "accuracy", "f0point5", "f2", "f1", "mean_per_class_accuracy"] for m in metrics_list): raise ValueError( "The only allowable metrics are min_per_class_accuracy, absolute_mcc, precision, accuracy, f0point5, " "f2, f1, mean_per_class_accuracy") # make one big list that combines the thresholds and metric-thresholds metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list] for mt in metrics_thresholds: thresholds_list.append(mt) thresh2d = self._metric_json['thresholds_and_metric_scores'] actual_thresholds = [float(e[0]) for i, e in enumerate(thresh2d.cell_values)] cms = [] for t in thresholds_list: idx = self.find_idx_by_threshold(t) row = thresh2d.cell_values[idx] tns = row[11] fns = row[12] fps = row[13] tps = row[14] p = tps + fns n = tns + fps c0 = n - fps c1 = p - tps if t in metrics_thresholds: m = metrics_list[metrics_thresholds.index(t)] table_header = "Confusion Matrix (Act/Pred) for max " + m + " @ threshold = " + str( actual_thresholds[idx]) else: table_header = "Confusion Matrix (Act/Pred) @ threshold = " + str(actual_thresholds[idx]) cms.append(ConfusionMatrix(cm=[[c0, fps], [c1, tps]], domains=self._metric_json['domain'], table_header=table_header)) if len(cms) == 1: return cms[0] else: return cms
def test_asserts(): """Test type-checking functionality.""" def assert_error(*args, **kwargs): """Check that assert_is_type() with given arguments throws an error.""" try: assert_is_type(*args, **kwargs) raise RuntimeError("Failed to throw an exception") except H2OTypeError as e: # Check whether the message can stringify properly message = str(e) assert len(message) < 1000 return class A(object): pass class B(A): pass class C(A): pass class D(B, C): pass assert_is_type(3, int) assert_is_type(2**100, int) assert_is_type("3", str) assert_is_type(u"3", str) assert_is_type("foo", u"foo") assert_is_type(u"foo", "foo") assert_is_type("I", *list("ABCDEFGHIJKL")) assert_is_type(False, bool) assert_is_type(43, str, bool, int) assert_is_type(4 / 3, int, float) assert_is_type(None, None) assert_is_type(None, A, str, None) assert_is_type([], [float]) assert_is_type([1, 4, 5], [int]) assert_is_type([1.0, 2, 5], [int, float]) assert_is_type([[2.0, 3.1, 0], [2, 4.4, 1.1], [-1, 0]], [[int, float]]) assert_is_type([1, None, 2], [int, float, None]) assert_is_type({1, 5, 1, 1, 3}, {int}) assert_is_type({1, "hello", 3}, {int, str}) assert_is_type({"foo": 1, "bar": 2}, {str: int}) assert_is_type({ "foo": 3, "bar": [5], "baz": None }, {str: U(int, None, [int])}) assert_is_type({ "foo": 1, "bar": 2 }, { "foo": int, "bar": U(int, float, None), "baz": bool }) assert_is_type((1, 3), (int, int)) assert_is_type(("a", "b", "c"), (int, int, int), (str, str, str)) assert_is_type([1, [2], [{3}]], [int, [int], [{3}]]) assert_is_type(A(), None, A) assert_is_type(B(), None, A) assert_is_type(C(), A, B) assert_is_type(D(), I(A, B, C)) assert_is_type(A, type) for a in range(-2, 5): assert_is_type(a, -2, -1, 0, 1, 2, 3, 4) assert_is_type(1, numeric) assert_is_type(2.2, numeric) assert_is_type(1, I(numeric, object)) assert_error(3, str) assert_error("Z", *list("ABCDEFGHIJKL")) assert_error(u"Z", "a", "...", "z") assert_error("X", u"x") assert_error(0, bool) assert_error(0, float, str, bool, None) assert_error([1, 5], [float]) assert_error((1, 3), (int, str), (str, int), (float, float)) assert_error(A(), None, B) assert_error(A, A) assert_error({ "foo": 1, "bar": "2" }, { "foo": int, "bar": U(int, float, None) }) assert_error(3, 0, 2, 4) assert_error(None, numeric) assert_error("sss", numeric) assert_error(B(), I(A, B, C)) assert_error(2, I(int, str)) url_regex = r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?$" assert_matches("Hello, world!", r"^(\w+), (\w*)!$") assert_matches("http://127.0.0.1:3233/", url_regex) m = assert_matches("https://localhost:54321", url_regex) assert m.group(1) == "https" assert m.group(2) == "localhost" assert m.group(3) == "54321" x = 5 assert_satisfies(x, x < 1000) assert_satisfies(x, x**x > 1000) assert_satisfies(url_regex, url_regex.lower() == url_regex) try: assert_satisfies(url_regex, url_regex.upper() == url_regex) except H2OValueError as e: assert "url_regex.upper() == url_regex" in str( e), "Error message is bad: " + str(e)
def confusion_matrix(self, metrics=None, thresholds=None): """ Get the confusion matrix for the specified metric :param metrics: A string (or list of strings) among metrics listed in :const:`max_metrics`. Defaults to 'f1'. :param thresholds: A value (or list of values) between 0 and 1. :returns: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix (if there is only one). """ # make lists out of metrics and thresholds arguments if metrics is None and thresholds is None: metrics = ['f1'] if isinstance(metrics, list): metrics_list = metrics elif metrics is None: metrics_list = [] else: metrics_list = [metrics] if isinstance(thresholds, list): thresholds_list = thresholds elif thresholds is None: thresholds_list = [] else: thresholds_list = [thresholds] # error check the metrics_list and thresholds_list assert_is_type(thresholds_list, [numeric]) assert_satisfies(thresholds_list, all(0 <= t <= 1 for t in thresholds_list)) if not all(m.lower() in H2OBinomialModelMetrics.max_metrics for m in metrics_list): raise ValueError("The only allowable metrics are {}", ', '.join(H2OBinomialModelMetrics.max_metrics)) # make one big list that combines the thresholds and metric-thresholds metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list] for mt in metrics_thresholds: thresholds_list.append(mt) first_metrics_thresholds_offset = len(thresholds_list) - len(metrics_thresholds) thresh2d = self._metric_json['thresholds_and_metric_scores'] actual_thresholds = [float(e[0]) for i, e in enumerate(thresh2d.cell_values)] cms = [] for i, t in enumerate(thresholds_list): idx = self.find_idx_by_threshold(t) row = thresh2d.cell_values[idx] tns = row[11] fns = row[12] fps = row[13] tps = row[14] p = tps + fns n = tns + fps c0 = n - fps c1 = p - tps if t in metrics_thresholds: m = metrics_list[i - first_metrics_thresholds_offset] table_header = "Confusion Matrix (Act/Pred) for max {} @ threshold = {}".format(m, actual_thresholds[idx]) else: table_header = "Confusion Matrix (Act/Pred) @ threshold = {}".format(actual_thresholds[idx]) cms.append(ConfusionMatrix(cm=[[c0, fps], [c1, tps]], domains=self._metric_json['domain'], table_header=table_header)) if len(cms) == 1: return cms[0] else: return cms
def test_asserts(): """Test type-checking functionality.""" def assert_error(*args, **kwargs): """Check that assert_is_type() with given arguments throws an error.""" try: assert_is_type(*args, **kwargs) raise RuntimeError("Failed to throw an exception") except H2OTypeError as e: # Check whether the message can stringify properly message = str(e) assert len(message) < 1000 return class A(object): pass class B(A): pass class C(A): pass class D(B, C): pass assert_is_type(3, int) assert_is_type(2**100, int) assert_is_type("3", str) assert_is_type(u"3", str) assert_is_type("foo", u"foo") assert_is_type(u"foo", "foo") assert_is_type("I", *list("ABCDEFGHIJKL")) assert_is_type(False, bool) assert_is_type(43, str, bool, int) assert_is_type(4 / 3, int, float) assert_is_type(None, None) assert_is_type(None, A, str, None) assert_is_type([], [float]) assert_is_type([1, 4, 5], [int]) assert_is_type([1.0, 2, 5], [int, float]) assert_is_type([[2.0, 3.1, 0], [2, 4.4, 1.1], [-1, 0]], [[int, float]]) assert_is_type([1, None, 2], [int, float, None]) assert_is_type({1, 5, 1, 1, 3}, {int}) assert_is_type({1, "hello", 3}, {int, str}) assert_is_type({"foo": 1, "bar": 2}, {str: int}) assert_is_type({"foo": 3, "bar": [5], "baz": None}, {str: U(int, None, [int])}) assert_is_type({"foo": 1, "bar": 2}, {"foo": int, "bar": U(int, float, None), "baz": bool}) assert_is_type((1, 3), (int, int)) assert_is_type(("a", "b", "c"), (int, int, int), (str, str, str)) assert_is_type([1, [2], [{3}]], [int, [int], [{3}]]) assert_is_type(A(), None, A) assert_is_type(B(), None, A) assert_is_type(C(), A, B) assert_is_type(D(), I(A, B, C)) assert_is_type(A, type) for a in range(-2, 5): assert_is_type(a, -2, -1, 0, 1, 2, 3, 4) assert_is_type(1, numeric) assert_is_type(2.2, numeric) assert_is_type(1, I(numeric, object)) assert_error(3, str) assert_error("Z", *list("ABCDEFGHIJKL")) assert_error(u"Z", "a", "...", "z") assert_error("X", u"x") assert_error(0, bool) assert_error(0, float, str, bool, None) assert_error([1, 5], [float]) assert_error((1, 3), (int, str), (str, int), (float, float)) assert_error(A(), None, B) assert_error(A, A) assert_error({"foo": 1, "bar": "2"}, {"foo": int, "bar": U(int, float, None)}) assert_error(3, 0, 2, 4) assert_error(None, numeric) assert_error("sss", numeric) assert_error(B(), I(A, B, C)) assert_error(2, I(int, str)) url_regex = r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?$" assert_matches("Hello, world!", r"^(\w+), (\w*)!$") assert_matches("http://127.0.0.1:3233/", url_regex) m = assert_matches("https://localhost:54321", url_regex) assert m.group(1) == "https" assert m.group(2) == "localhost" assert m.group(3) == "54321" x = 5 assert_satisfies(x, x < 1000) assert_satisfies(x, x ** x > 1000) assert_satisfies(url_regex, url_regex.lower() == url_regex) try: assert_satisfies(url_regex, url_regex.upper() == url_regex) except H2OValueError as e: assert "url_regex.upper() == url_regex" in str(e), "Error message is bad: " + str(e)
def start(jar_path=None, nthreads=-1, enable_assertions=True, max_mem_size=None, min_mem_size=None, ice_root=None, port="54321+", extra_classpath=None, verbose=True): """ Start new H2O server on the local machine. :param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the locations returned by `._jar_paths()`. :param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used. -1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly. :param enable_assertions: If True, pass `-ea` option to the JVM. :param max_mem_size: Maximum heap size (jvm option Xmx), in bytes. :param min_mem_size: Minimum heap size (jvm option Xms), in bytes. :param ice_root: A directory where H2O stores its temporary files. Default location is determined by tempfile.mkdtemp(). :param port: Port where to start the new server. This could be either an integer, or a string of the form "DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up. :param extra_classpath List of paths to libraries that should be included on the Java classpath. :param verbose: If True, then connection info will be printed to the stdout. :returns: a new H2OLocalServer instance """ assert_is_type(jar_path, None, str) assert_is_type(port, None, int, str) assert_is_type(nthreads, -1, BoundInt(1, 4096)) assert_is_type(enable_assertions, bool) assert_is_type(min_mem_size, None, int) assert_is_type(max_mem_size, None, BoundInt(1 << 25)) assert_is_type(ice_root, None, I(str, os.path.isdir)) assert_is_type(extra_classpath, None, [str]) if jar_path: assert_satisfies(jar_path, jar_path.endswith("h2o.jar")) if min_mem_size is not None and max_mem_size is not None and min_mem_size > max_mem_size: raise H2OValueError( "`min_mem_size`=%d is larger than the `max_mem_size`=%d" % (min_mem_size, max_mem_size)) if port is None: port = "54321+" baseport = None # TODO: get rid of this port gimmick and have 2 separate parameters. if is_type(port, str): if port.isdigit(): port = int(port) else: if not (port[-1] == "+" and port[:-1].isdigit()): raise H2OValueError( "`port` should be of the form 'DDDD+', where D is a digit. Got: %s" % port) baseport = int(port[:-1]) port = 0 hs = H2OLocalServer() hs._verbose = bool(verbose) hs._jar_path = hs._find_jar(jar_path) hs._extra_classpath = extra_classpath hs._ice_root = ice_root if not ice_root: hs._ice_root = tempfile.mkdtemp() hs._tempdir = hs._ice_root if verbose: print("Attempting to start a local H2O server...") hs._launch_server(port=port, baseport=baseport, nthreads=int(nthreads), ea=enable_assertions, mmax=max_mem_size, mmin=min_mem_size) if verbose: print(" Server is running at %s://%s:%d" % (hs.scheme, hs.ip, hs.port)) atexit.register(lambda: hs.shutdown()) return hs
def test_asserts(): """Test type-checking functionality.""" def assert_error(*args, **kwargs): """Check that assert_is_type() with given arguments throws an error.""" try: assert_is_type(*args, **kwargs) raise RuntimeError("Failed to throw an exception") except H2OTypeError as exc: # Check whether the message can stringify properly message = str(exc) assert len(message) < 1000 return class A(object): """Dummy A.""" class B(A): """Dummy B.""" class C(A): """Dummy C.""" class D(B, C): """Dummy D.""" assert_is_type(3, int) assert_is_type(2**100, int) assert_is_type("3", str) assert_is_type(u"3", str) assert_is_type("foo", u"foo") assert_is_type(u"foo", "foo") assert_is_type("I", *list("ABCDEFGHIJKL")) assert_is_type(False, bool) assert_is_type(43, str, bool, int) assert_is_type(4 / 3, int, float) assert_is_type(None, None) assert_is_type(None, A, str, None) assert_is_type([], [float]) assert_is_type([1, 4, 5], [int]) assert_is_type([1.0, 2, 5], [int, float]) assert_is_type([[2.0, 3.1, 0], [2, 4.4, 1.1], [-1, 0]], [[int, float]]) assert_is_type([1, None, 2], [int, float, None]) assert_is_type({1, 5, 1, 1, 3}, {int}) assert_is_type({1, "hello", 3}, {int, str}) assert_is_type({"foo": 1, "bar": 2}, {str: int}) assert_is_type({"foo": 3, "bar": [5], "baz": None}, {str: U(int, None, [int])}) assert_is_type({"foo": 1, "bar": 2}, {"foo": int, "bar": U(int, float, None), "baz": bool}) assert_is_type({}, {"spam": int, "egg": int}) assert_is_type({"spam": 10}, {"spam": int, "egg": int}) assert_is_type({"egg": 1}, {"spam": int, "egg": int}) assert_is_type({"egg": 1, "spam": 10}, {"spam": int, "egg": int}) assert_is_type({"egg": 1, "spam": 10}, Dict(egg=int, spam=int)) assert_is_type({"egg": 1, "spam": 10}, Dict(egg=int, spam=int, ham=U(int, None))) assert_is_type((1, 3), (int, int)) assert_is_type(("a", "b", "c"), (int, int, int), (str, str, str)) assert_is_type((1, 3, 4, 7, 11, 18), Tuple(int)) assert_is_type((1, 3, "spam", 3, "egg"), Tuple(int, str)) assert_is_type([1, [2], [{3}]], [int, [int], [{3}]]) assert_is_type(A(), None, A) assert_is_type(B(), None, A) assert_is_type(C(), A, B) assert_is_type(D(), I(A, B, C)) assert_is_type(A, type) assert_is_type(B, lambda aa: issubclass(aa, A)) for a in range(-2, 5): assert_is_type(a, -2, -1, 0, 1, 2, 3, 4) assert_is_type(1, numeric) assert_is_type(2.2, numeric) assert_is_type(1, I(numeric, object)) assert_is_type(34, I(int, NOT(0))) assert_is_type(["foo", "egg", "spaam"], [I(str, NOT("spam"))]) assert_is_type(H2OFrame(), h2oframe) assert_is_type([[2.0, 3.1, 0], [2, 4.4, 1.1], [-1, 0, 0]], I([[numeric]], lambda v: all(len(vi) == len(v[0]) for vi in v))) assert_is_type([None, None, float('nan'), None, "N/A"], [None, "N/A", I(float, math.isnan)]) assert_error(3, str) assert_error(0, float) assert_error("Z", *list("ABCDEFGHIJKL")) assert_error(u"Z", "a", "...", "z") assert_error("X", u"x") assert_error(0, bool) assert_error(0, float, str, bool, None) assert_error([1, 5], [float]) assert_error((1, 3), (int, str), (str, int), (float, float)) assert_error(A(), None, B) assert_error(A, A) assert_error(A, lambda aa: issubclass(aa, B)) assert_error(135, I(int, lambda x: 0 <= x <= 100)) assert_error({"foo": 1, "bar": "2"}, {"foo": int, "bar": U(int, float, None)}) assert_error(3, 0, 2, 4) assert_error(None, numeric) assert_error("sss", numeric) assert_error(B(), I(A, B, C)) assert_error(2, I(int, str)) assert_error(0, I(int, NOT(0))) assert_error(None, NOT(None)) assert_error((1, 3, "2", 3), Tuple(int)) assert_error({"spam": 10}, Dict(spam=int, egg=int)) assert_error({"egg": 5}, Dict(spam=int, egg=int)) assert_error(False, h2oframe, pandas_dataframe, numpy_ndarray) assert_error([[2.0, 3.1, 0], [2, 4.4, 1.1], [-1, 0]], I([[numeric]], lambda v: all(len(vi) == len(v[0]) for vi in v))) try: # Cannot use `assert_error` here because typechecks module cannot detect args in (*args, *kwargs) assert_is_type(10000000, I(int, lambda port: 1 <= port <= 65535)) assert False, "Failed to throw an exception" except H2OTypeError as e: assert "integer & 1 <= port <= 65535" in str(e), "Bad error message: '%s'" % e url_regex = r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?$" assert_matches("Hello, world!", r"^(\w+), (\w*)!$") assert_matches("http://127.0.0.1:3233/", url_regex) m = assert_matches("https://localhost:54321", url_regex) assert m.group(1) == "https" assert m.group(2) == "localhost" assert m.group(3) == "54321" x = 5 assert_satisfies(x, x < 1000) assert_satisfies(x, x ** x > 1000) assert_satisfies(url_regex, url_regex.lower() == url_regex) try: assert_satisfies(url_regex, url_regex.upper() == url_regex) except H2OValueError as e: assert "url_regex.upper() == url_regex" in str(e), "Error message is bad: " + str(e) try: import pandas import numpy assert_is_type(pandas.DataFrame(), pandas_dataframe) assert_is_type(numpy.ndarray(shape=(5,)), numpy_ndarray) except ImportError: pass
def start(jar_path=None, nthreads=-1, enable_assertions=True, max_mem_size=None, min_mem_size=None, ice_root=None, port="54321+", verbose=True): """ Start new H2O server on the local machine. :param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the locations returned by `._jar_paths()`. :param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used. -1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly. :param enable_assertions: If True, pass `-ea` option to the JVM. :param max_mem_size: Maximum heap size (jvm option Xmx), in bytes. :param min_mem_size: Minimum heap size (jvm option Xms), in bytes. :param ice_root: A directory where H2O stores its temporary files. Default location is determined by tempfile.mkdtemp(). :param port: Port where to start the new server. This could be either an integer, or a string of the form "DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up. :param verbose: If True, then connection info will be printed to the stdout. :returns: a new H2OLocalServer instance """ assert_is_type(jar_path, None, str) assert_is_type(port, None, int, str) assert_is_type(nthreads, int) assert_is_type(enable_assertions, bool) assert_is_type(min_mem_size, None, int) assert_is_type(max_mem_size, None, int) assert_is_type(ice_root, None, str) if jar_path: assert_satisfies(jar_path, jar_path.endswith("h2o.jar")) assert nthreads == -1 or 1 <= nthreads <= 4096, "`nthreads` is out of bounds: %d" % nthreads assert max_mem_size is None or max_mem_size >= 1 << 25, "`max_mem_size` too small: %d" % max_mem_size assert min_mem_size is None or max_mem_size is None or min_mem_size <= max_mem_size, \ "`min_mem_size`=%d is larger than the `max_mem_size`=%d" % (min_mem_size, max_mem_size) if ice_root: assert os.path.isdir(ice_root), "`ice_root` is not a valid directory: %s" % ice_root if port is None: port = "54321+" baseport = None if is_type(port, str): if port.isdigit(): port = int(port) else: assert port[-1] == "+" and port[:-1].isdigit(), \ "`port` should be of the form 'DDDD+', where D is a digit. Got: %s" % port baseport = int(port[:-1]) port = 0 hs = H2OLocalServer() hs._verbose = bool(verbose) hs._jar_path = hs._find_jar(jar_path) hs._ice_root = ice_root if not ice_root: hs._ice_root = tempfile.mkdtemp() hs._tempdir = hs._ice_root if verbose: print("Attempting to start a local H2O server...") hs._launch_server(port=port, baseport=baseport, nthreads=int(nthreads), ea=enable_assertions, mmax=max_mem_size, mmin=min_mem_size) if verbose: print(" Server is running at %s://%s:%d" % (hs.scheme, hs.ip, hs.port)) atexit.register(lambda: hs.shutdown()) return hs
def open(server=None, url=None, ip=None, port=None, https=None, auth=None, verify_ssl_certificates=True, proxy=None, cluster_name=None, verbose=True, _msgs=None): r""" Establish connection to an existing H2O server. The connection is not kept alive, so what this method actually does is it attempts to connect to the specified server, and checks that the server is healthy and responds to REST API requests. If the H2O server cannot be reached, an :class:`H2OConnectionError` will be raised. On success this method returns a new :class:`H2OConnection` object, and it is the only "official" way to create instances of this class. There are 3 ways to specify the target to connect to (these settings are mutually exclusive): * pass a ``server`` option, * pass the full ``url`` for the connection, * provide a triple of parameters ``ip``, ``port``, ``https``. :param H2OLocalServer server: connect to the specified local server instance. There is a slight difference between connecting to a local server by specifying its ip and address, and connecting through an H2OLocalServer instance: if the server becomes unresponsive, then having access to its process handle will allow us to query the server status through OS, and potentially provide snapshot of the server's error log in the exception information. :param url: full url of the server to connect to. :param ip: target server's IP address or hostname (default "localhost"). :param port: H2O server's port (default 54321). :param https: if True then connect using https instead of http (default False). :param verify_ssl_certificates: if False then SSL certificate checking will be disabled (default True). This setting should rarely be disabled, as it makes your connection vulnerable to man-in-the-middle attacks. When used, it will generate a warning from the requests library. Has no effect when ``https`` is False. :param auth: authentication token for connecting to the remote server. This can be either a (username, password) tuple, or an authenticator (AuthBase) object. Please refer to the documentation in the ``requests.auth`` module. :param proxy: url address of a proxy server. If you do not specify the proxy, then the requests module will attempt to use a proxy specified in the environment (in HTTP_PROXY / HTTPS_PROXY variables). We check for the presence of these variables and issue a warning if they are found. In order to suppress that warning and use proxy from the environment, pass ``proxy="(default)"``. :param cluster_name: name of the H2O cluster to connect to. This option is used from Steam only. :param verbose: if True, then connection progress info will be printed to the stdout. :param _msgs: custom messages to display during connection. This is a tuple (initial message, success message, failure message). :returns: A new :class:`H2OConnection` instance. :raises H2OConnectionError: if the server cannot be reached. :raises H2OServerError: if the server is in an unhealthy state (although this might be a recoverable error, the client itself should decide whether it wants to retry or not). """ if server is not None: assert_is_type(server, H2OLocalServer) assert_is_type(ip, None, "`ip` should be None when `server` parameter is supplied") assert_is_type(url, None, "`ip` should be None when `server` parameter is supplied") if not server.is_running(): raise H2OConnectionError("Unable to connect to server because it is not running") ip = server.ip port = server.port scheme = server.scheme elif url is not None: assert_is_type(url, str) assert_is_type(ip, None, "`ip` should be None when `url` parameter is supplied") # We don't allow any Unicode characters in the URL. Maybe some day we will... match = assert_matches(url, r"^(https?)://((?:[\w-]+\.)*[\w-]+):(\d+)/?$") scheme = match.group(1) ip = match.group(2) port = int(match.group(3)) else: if ip is None: ip = str("localhost") if port is None: port = 54321 if https is None: https = False if is_str(port) and port.isdigit(): port = int(port) assert_is_type(ip, str) assert_is_type(port, int) assert_is_type(https, bool) assert_matches(ip, r"(?:[\w-]+\.)*[\w-]+") assert_satisfies(port, 1 <= port <= 65535) scheme = "https" if https else "http" if verify_ssl_certificates is None: verify_ssl_certificates = True assert_is_type(verify_ssl_certificates, bool) assert_is_type(proxy, str, None) assert_is_type(auth, AuthBase, (str, str), None) assert_is_type(cluster_name, str, None) assert_is_type(_msgs, None, (str, str, str)) conn = H2OConnection() conn._verbose = bool(verbose) conn._local_server = server conn._base_url = "%s://%s:%d" % (scheme, ip, port) conn._verify_ssl_cert = bool(verify_ssl_certificates) conn._auth = auth conn._cluster_name = cluster_name conn._proxies = None if proxy and proxy != "(default)": conn._proxies = {scheme: proxy} elif not proxy: # Give user a warning if there are any "*_proxy" variables in the environment. [PUBDEV-2504] # To suppress the warning pass proxy = "(default)". for name in os.environ: if name.lower() == scheme + "_proxy": warn("Proxy is defined in the environment: %s. " "This may interfere with your H2O Connection." % os.environ[name]) try: retries = 20 if server else 5 conn._stage = 1 conn._timeout = 3.0 conn._cluster_info = conn._test_connection(retries, messages=_msgs) # If a server is unable to respond within 1s, it should be considered a bug. However we disable this # setting for now, for no good reason other than to ignore all those bugs :( conn._timeout = None # This is a good one! On the surface it registers a callback to be invoked when the script is about # to finish, but it also has a side effect in that the reference to current connection will be held # by the ``atexit`` service till the end -- which means it will never be garbage-collected. atexit.register(lambda: conn.close()) except Exception: # Reset _session_id so that we know the connection was not initialized properly. conn._stage = 0 raise return conn
def start(jar_path=None, nthreads=-1, enable_assertions=True, max_mem_size=None, min_mem_size=None, ice_root=None, log_dir=None, log_level=None, port="54321+", name=None, extra_classpath=None, verbose=True, jvm_custom_args=None, bind_to_localhost=True): """ Start new H2O server on the local machine. :param jar_path: Path to the h2o.jar executable. If not given, then we will search for h2o.jar in the locations returned by `._jar_paths()`. :param nthreads: Number of threads in the thread pool. This should be related to the number of CPUs used. -1 means use all CPUs on the host. A positive integer specifies the number of CPUs directly. :param enable_assertions: If True, pass `-ea` option to the JVM. :param max_mem_size: Maximum heap size (jvm option Xmx), in bytes. :param min_mem_size: Minimum heap size (jvm option Xms), in bytes. :param log_dir: Directory for H2O logs to be stored if a new instance is started. Default directory is determined by H2O internally. :param log_level: The logger level for H2O if a new instance is started. :param ice_root: A directory where H2O stores its temporary files. Default location is determined by tempfile.mkdtemp(). :param port: Port where to start the new server. This could be either an integer, or a string of the form "DDDDD+", indicating that the server should start looking for an open port starting from DDDDD and up. :param name: name of the h2o cluster to be started :param extra_classpath List of paths to libraries that should be included on the Java classpath. :param verbose: If True, then connection info will be printed to the stdout. :param jvm_custom_args Custom, user-defined arguments for the JVM H2O is instantiated in :param bind_to_localhost A flag indicating whether access to the H2O instance should be restricted to the local machine (default) or if it can be reached from other computers on the network. Only applicable when H2O is started from the Python client. :returns: a new H2OLocalServer instance """ assert_is_type(jar_path, None, str) assert_is_type(port, None, int, str) assert_is_type(name, None, str) assert_is_type(nthreads, -1, BoundInt(1, 4096)) assert_is_type(enable_assertions, bool) assert_is_type(min_mem_size, None, int) assert_is_type(max_mem_size, None, BoundInt(1 << 25)) assert_is_type(log_dir, str, None) assert_is_type(log_level, str, None) assert_satisfies(log_level, log_level in [None, "TRACE", "DEBUG", "INFO", "WARN", "ERRR", "FATA"]) assert_is_type(ice_root, None, I(str, os.path.isdir)) assert_is_type(extra_classpath, None, [str]) assert_is_type(jvm_custom_args, list, None) assert_is_type(bind_to_localhost, bool) if jar_path: assert_satisfies(jar_path, jar_path.endswith("h2o.jar")) if min_mem_size is not None and max_mem_size is not None and min_mem_size > max_mem_size: raise H2OValueError("`min_mem_size`=%d is larger than the `max_mem_size`=%d" % (min_mem_size, max_mem_size)) if port is None: port = "54321+" baseport = None # TODO: get rid of this port gimmick and have 2 separate parameters. if is_type(port, str): if port.isdigit(): port = int(port) else: if not(port[-1] == "+" and port[:-1].isdigit()): raise H2OValueError("`port` should be of the form 'DDDD+', where D is a digit. Got: %s" % port) baseport = int(port[:-1]) port = 0 hs = H2OLocalServer() hs._verbose = bool(verbose) hs._jar_path = hs._find_jar(jar_path) hs._extra_classpath = extra_classpath hs._ice_root = ice_root hs._name = name if not ice_root: hs._ice_root = tempfile.mkdtemp() hs._tempdir = hs._ice_root if verbose: print("Attempting to start a local H2O server...") hs._launch_server(port=port, baseport=baseport, nthreads=int(nthreads), ea=enable_assertions, mmax=max_mem_size, mmin=min_mem_size, jvm_custom_args=jvm_custom_args, bind_to_localhost=bind_to_localhost, log_dir=log_dir, log_level=log_level) if verbose: print(" Server is running at %s://%s:%d" % (hs.scheme, hs.ip, hs.port)) atexit.register(lambda: hs.shutdown()) return hs
def confusion_matrix(self, metrics=None, thresholds=None): """ Get the confusion matrix for the specified metric :param metrics: A string (or list of strings) in {"min_per_class_accuracy", "absolute_mcc", "tnr", "fnr", "fpr", "tpr", "precision", "accuracy", "f0point5", "f2", "f1","mean_per_class_accuracy"} :param thresholds: A value (or list of values) between 0 and 1 :returns: a list of ConfusionMatrix objects (if there are more than one to return), or a single ConfusionMatrix (if there is only one). """ # make lists out of metrics and thresholds arguments if metrics is None and thresholds is None: metrics = ["f1"] if isinstance(metrics, list): metrics_list = metrics elif metrics is None: metrics_list = [] else: metrics_list = [metrics] if isinstance(thresholds, list): thresholds_list = thresholds elif thresholds is None: thresholds_list = [] else: thresholds_list = [thresholds] # error check the metrics_list and thresholds_list assert_is_type(thresholds_list, [numeric]) assert_satisfies(thresholds_list, all(0 <= t <= 1 for t in thresholds_list)) if not all(m.lower() in ["min_per_class_accuracy", "absolute_mcc", "precision", "recall", "specificity", "accuracy", "f0point5", "f2", "f1", "mean_per_class_accuracy"] for m in metrics_list): raise ValueError( "The only allowable metrics are min_per_class_accuracy, absolute_mcc, precision, accuracy, f0point5, " "f2, f1, mean_per_class_accuracy") # make one big list that combines the thresholds and metric-thresholds metrics_thresholds = [self.find_threshold_by_max_metric(m) for m in metrics_list] for mt in metrics_thresholds: thresholds_list.append(mt) thresh2d = self._metric_json['thresholds_and_metric_scores'] actual_thresholds = [float(e[0]) for i, e in enumerate(thresh2d.cell_values)] cms = [] for t in thresholds_list: idx = self.find_idx_by_threshold(t) row = thresh2d.cell_values[idx] tns = row[11] fns = row[12] fps = row[13] tps = row[14] p = tps + fns n = tns + fps c0 = n - fps c1 = p - tps if t in metrics_thresholds: m = metrics_list[metrics_thresholds.index(t)] table_header = "Confusion Matrix (Act/Pred) for max " + m + " @ threshold = " + str( actual_thresholds[idx]) else: table_header = "Confusion Matrix (Act/Pred) @ threshold = " + str(actual_thresholds[idx]) cms.append(ConfusionMatrix(cm=[[c0, fps], [c1, tps]], domains=self._metric_json['domain'], table_header=table_header)) if len(cms) == 1: return cms[0] else: return cms