Пример #1
0
 def jobs(self, job_key=None, timeoutSecs=10, **kwargs):
     params_dict = {
         'job_key': job_key
     }
     h2o_util.check_params_update_kwargs(params_dict, kwargs, 'jobs', H2O.verbose)
     result = self.__do_json_request('/3/Jobs', timeout=timeoutSecs, params=params_dict)
     return result
Пример #2
0
 def jobs(self, job_key=None, timeoutSecs=10, **kwargs):
     params_dict = {'job_key': job_key}
     h2o_util.check_params_update_kwargs(params_dict, kwargs, 'jobs',
                                         H2O.verbose)
     result = self.__do_json_request('/3/Jobs',
                                     timeout=timeoutSecs,
                                     params=params_dict)
     return result
Пример #3
0
    def columns(self, key, timeoutSecs=10, **kwargs):
        params_dict = {'row_offset': 0, 'row_count': 100}
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'columns',
                                            H2O.verbose)

        result = self.__do_json_request('/3/Frames/' + key + '/columns',
                                        timeout=timeoutSecs,
                                        params=params_dict)
        return result
Пример #4
0
 def column(self, key, column, timeoutSecs=10, **kwargs):
     params_dict = { 
         'offset': 0,
         'len': 100
     }
     h2o_util.check_params_update_kwargs(params_dict, kwargs, 'column', H2O.verbose)
     
     result = self.__do_json_request('3/Frames.json/' + key + '/columns/' + column, timeout=timeoutSecs, params=params_dict)
     return result
Пример #5
0
 def summary(self, key, column, timeoutSecs=10, **kwargs):
     params_dict = { 
         'row_offset': 0,
         'row_count': 100
     }
     h2o_util.check_params_update_kwargs(params_dict, kwargs, 'summary', H2O.verbose)
     
     result = self.__do_json_request('/3/Frames/' + key + '/columns/' + column + '/summary', timeout=timeoutSecs, params=params_dict)
     return result
Пример #6
0
    def parse(self, key, dest_key=None,
              timeoutSecs=300, retryDelaySecs=0.2, initialDelaySecs=None, pollTimeoutSecs=180,
              noise=None, benchmarkLogging=None, noPoll=False, **kwargs):

        #
        # Call ParseSetup?source_frames=[keys] . . .
        #

        if benchmarkLogging:
            cloudPerfH2O.get_log_save(initOnly=True)

        # TODO: multiple keys
        parse_setup_params = {
            'source_frames': '["' + key + '"]'  # NOTE: quote key names
        }
        # h2o_util.check_params_update_kwargs(params_dict, kwargs, 'parse_setup', print_params=H2O.verbose)
        setup_result = self.__do_json_request(jsonRequest="/3/ParseSetup", cmd='post', timeout=timeoutSecs, postData=parse_setup_params)
        H2O.verboseprint("ParseSetup result:", h2o_util.dump_json(setup_result))

        # 
        # and then Parse?source_frames=<keys list> and params from the ParseSetup result
        # Parse?source_frames=[nfs://Users/rpeck/Source/h2o2/smalldata/logreg/prostate.csv]&destination_frame=prostate.hex&parse_type=CSV&separator=44&number_columns=9&check_header=0&single_quotes=false&column_names=['ID',CAPSULE','AGE','RACE','DPROS','DCAPS','PSA','VOL','GLEASON]
        #

        parse_params = {
            'source_frames': '["' + setup_result['source_frames'][0]['name'] + '"]', # TODO: cons up the whole list
            'destination_frame': dest_key if dest_key else setup_result['destination_frame'],
            'parse_type': setup_result['parse_type'],
            'separator': setup_result['separator'],
            'single_quotes': setup_result['single_quotes'],
            'check_header': setup_result['check_header'],
            'number_columns': setup_result['number_columns'],
            'column_names': setup_result['column_names'], # gets stringified inside __do_json_request()
            'column_types': setup_result['column_types'], # gets stringified inside __do_json_request()
	    'na_strings': setup_result['na_strings'],
            'chunk_size': setup_result['chunk_size'],
        }
        H2O.verboseprint("parse_params: " + repr(parse_params))
        h2o_util.check_params_update_kwargs(parse_params, kwargs, 'parse', print_params=H2O.verbose)

        parse_result = self.__do_json_request(jsonRequest="/3/Parse", cmd='post', timeout=timeoutSecs, postData=parse_params, **kwargs)
        H2O.verboseprint("Parse result:", h2o_util.dump_json(parse_result))

        # print("Parse result:", repr(parse_result))
        job_key = parse_result['job']['key']['name']

        # TODO: dislike having different shapes for noPoll and poll
        if noPoll:
            return this.jobs(job_key)

        job_json = self.poll_job(job_key, timeoutSecs=timeoutSecs)

        if job_json:
            dest_key = job_json['jobs'][0]['dest']['name']
            return self.frames(dest_key)

        return None
Пример #7
0
    def parse(self, key, dest_key=None,
              timeoutSecs=300, retryDelaySecs=0.2, initialDelaySecs=None, pollTimeoutSecs=180,
              noise=None, benchmarkLogging=None, noPoll=False, **kwargs):

        #
        # Call ParseSetup?source_frames=[keys] . . .
        #

        if benchmarkLogging:
            cloudPerfH2O.get_log_save(initOnly=True)

        # TODO: multiple keys
        parse_setup_params = {
            'source_frames': '["' + key + '"]'  # NOTE: quote key names
        }
        # h2o_util.check_params_update_kwargs(params_dict, kwargs, 'parse_setup', print_params=H2O.verbose)
        setup_result = self.__do_json_request(jsonRequest="/3/ParseSetup", cmd='post', timeout=timeoutSecs, postData=parse_setup_params)
        H2O.verboseprint("ParseSetup result:", h2o_util.dump_json(setup_result))

        # 
        # and then Parse?source_frames=<keys list> and params from the ParseSetup result
        # Parse?source_frames=[nfs://Users/rpeck/Source/h2o2/smalldata/logreg/prostate.csv]&destination_frame=prostate.hex&parse_type=CSV&separator=44&number_columns=9&check_header=0&single_quotes=false&column_names=['ID',CAPSULE','AGE','RACE','DPROS','DCAPS','PSA','VOL','GLEASON]
        #

        parse_params = {
            'source_frames': '["' + setup_result['source_frames'][0]['name'] + '"]', # TODO: cons up the whole list
            'destination_frame': dest_key if dest_key else setup_result['destination_frame'],
            'parse_type': setup_result['parse_type'],
            'separator': setup_result['separator'],
            'single_quotes': setup_result['single_quotes'],
            'check_header': setup_result['check_header'],
            'number_columns': setup_result['number_columns'],
            'column_names': setup_result['column_names'], # gets stringified inside __do_json_request()
            'column_types': setup_result['column_types'], # gets stringified inside __do_json_request()
	    'na_strings': setup_result['na_strings'],
            'chunk_size': setup_result['chunk_size'],
        }
        H2O.verboseprint("parse_params: " + repr(parse_params))
        h2o_util.check_params_update_kwargs(parse_params, kwargs, 'parse', print_params=H2O.verbose)

        parse_result = self.__do_json_request(jsonRequest="/3/Parse", cmd='post', timeout=timeoutSecs, postData=parse_params, **kwargs)
        H2O.verboseprint("Parse result:", h2o_util.dump_json(parse_result))

        # print("Parse result:", repr(parse_result))
        job_key = parse_result['job']['key']['name']

        # TODO: dislike having different shapes for noPoll and poll
        if noPoll:
            return this.jobs(job_key)

        job_json = self.poll_job(job_key, timeoutSecs=timeoutSecs)

        if job_json:
            dest_key = job_json['jobs'][0]['dest']['name']
            return self.frames(dest_key)

        return None
Пример #8
0
 def summary(self, key, column, timeoutSecs=10, **kwargs):
     params_dict = { 
         'offset': 0,
         'len': 100
     }
     h2o_util.check_params_update_kwargs(params_dict, kwargs, 'summary', True)
     
     result = self.__do_json_request('3/Frames.json/' + key + '/columns/' + column + '/summary', timeout=timeoutSecs, params=params_dict)
     return result
Пример #9
0
    def model_builders(self, algo=None, timeoutSecs=10, **kwargs):
        params_dict = {
        }
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'model_builders', H2O.verbose)

        if algo:
            result = self.__do_json_request('3/ModelBuilders/' + algo, timeout=timeoutSecs, params=params_dict)
        else:
            result = self.__do_json_request('3/ModelBuilders', timeout=timeoutSecs, params=params_dict)
        return result
Пример #10
0
    def model_builders(self, algo=None, timeoutSecs=10, **kwargs):
        params_dict = {
        }
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'model_builders', True)

        if algo:
            result = self.__do_json_request('2/ModelBuilders.json/' + algo, timeout=timeoutSecs, params=params_dict)
        else:
            result = self.__do_json_request('2/ModelBuilders.json', timeout=timeoutSecs, params=params_dict)
        return result
Пример #11
0
    def models(self, api_version=3, key=None, timeoutSecs=20, **kwargs):
        params_dict = {
            'find_compatible_frames': False
        }
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'models', H2O.verbose)

        if key:
            result = self.__do_json_request(str(api_version) + '/Models/' + key, timeout=timeoutSecs, params=params_dict)
        else:
            result = self.__do_json_request(str(api_version) + '/Models', timeout=timeoutSecs, params=params_dict)
        return result
Пример #12
0
    def models(self, api_version=3, key=None, timeoutSecs=20, **kwargs):
        params_dict = {
            'find_compatible_frames': False
        }
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'models', H2O.verbose)

        if key:
            result = self.__do_json_request(str(api_version) + '/Models/' + key, timeout=timeoutSecs, params=params_dict)
        else:
            result = self.__do_json_request(str(api_version) + '/Models', timeout=timeoutSecs, params=params_dict)
        return result
Пример #13
0
    def models(self, key=None, timeoutSecs=10, **kwargs):
        params_dict = {
            'find_compatible_frames': False
        }
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'models', True)

        if key:
            result = self.__do_json_request('3/Models.json/' + key, timeout=timeoutSecs, params=params_dict)
        else:
            result = self.__do_json_request('3/Models.json', timeout=timeoutSecs, params=params_dict)
        return result
Пример #14
0
    def models(self, key=None, timeoutSecs=10, **kwargs):
        params_dict = {
            'find_compatible_frames': False
        }
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'models', True)

        if key:
            result = self.__do_json_request('3/Models.json/' + key, timeout=timeoutSecs, params=params_dict)
        else:
            result = self.__do_json_request('3/Models.json', timeout=timeoutSecs, params=params_dict)
        return result
Пример #15
0
 def frames(self, key=None, timeoutSecs=10, **kwargs):
     params_dict = {
         'find_compatible_models': 0,
         'row_offset': 0,
         'row_count': 100
     }
     h2o_util.check_params_update_kwargs(params_dict, kwargs, 'frames', H2O.verbose)
     
     if key:
         result = self.__do_json_request('/3/Frames/' + key, timeout=timeoutSecs, params=params_dict)
     else:
         result = self.__do_json_request('/3/Frames', timeout=timeoutSecs, params=params_dict)
     return result
Пример #16
0
 def frames(self, key=None, timeoutSecs=10, **kwargs):
     params_dict = {
         'find_compatible_models': 0,
         'row_offset': 0,
         'row_count': 100
     }
     h2o_util.check_params_update_kwargs(params_dict, kwargs, 'frames', H2O.verbose)
     
     if key:
         result = self.__do_json_request('/3/Frames/' + key, timeout=timeoutSecs, params=params_dict)
     else:
         result = self.__do_json_request('/3/Frames', timeout=timeoutSecs, params=params_dict)
     return result
Пример #17
0
 def frames(self, key=None, timeoutSecs=10, **kwargs):
     params_dict = {
         'find_compatible_models': 0,
         'offset': 0,
         'len': 100     # TODO: len and offset are not working yet
     }
     h2o_util.check_params_update_kwargs(params_dict, kwargs, 'frames', H2O.verbose)
     
     if key:
         result = self.__do_json_request('3/Frames.json/' + key, timeout=timeoutSecs, params=params_dict)
     else:
         result = self.__do_json_request('3/Frames.json', timeout=timeoutSecs, params=params_dict)
     return result
Пример #18
0
 def frames(self, key=None, timeoutSecs=10, **kwargs):
     params_dict = {
         'find_compatible_models': 0,
         'offset': 0,
         'len': 100     # TODO: len and offset are not working yet
     }
     h2o_util.check_params_update_kwargs(params_dict, kwargs, 'frames', True)
     
     if key:
         result = self.__do_json_request('3/Frames.json/' + key, timeout=timeoutSecs, params=params_dict)
     else:
         result = self.__do_json_request('3/Frames.json', timeout=timeoutSecs, params=params_dict)
     return result
Пример #19
0
    def model_builders(self, algo=None, timeoutSecs=10, **kwargs):
        params_dict = {
        }
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'model_builders', H2O.verbose)

        if algo:
            if algo in H2O.experimental_algos:
               _rest_version = 99
            else:
              _rest_version = 3
            result = self.__do_json_request(str(_rest_version)+'/ModelBuilders/' + algo, timeout=timeoutSecs, params=params_dict)
        else:
            result = self.__do_json_request('3/ModelBuilders', timeout=timeoutSecs, params=params_dict)
        return result
Пример #20
0
    def model_builders(self, algo=None, timeoutSecs=10, **kwargs):
        params_dict = {
        }
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'model_builders', H2O.verbose)

        if algo:
            if algo in H2O.experimental_algos:
               _rest_version = 99
            else:
              _rest_version = 3
            result = self.__do_json_request(str(_rest_version)+'/ModelBuilders/' + algo, timeout=timeoutSecs, params=params_dict)
        else:
            result = self.__do_json_request('3/ModelBuilders', timeout=timeoutSecs, params=params_dict)
        return result
Пример #21
0
    def poll_job(self, job_key, timeoutSecs=10, retryDelaySecs=0.5, **kwargs):
        params_dict = {
        }
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'poll_job', True)

        start_time = time.time()
        while True:
            H2O.verboseprint('Polling for job: ' + job_key + '. . .')
            result = self.__do_json_request('2/Jobs.json/' + job_key, timeout=timeoutSecs, params=params_dict)
            
            if result['jobs'][0]['status'] == 'DONE' or result['jobs'][0]['status'] == 'CANCELLED' or result['jobs'][0]['status'] == 'FAILED':
                H2O.verboseprint('Job ' + result['jobs'][0]['status'] + ': ' + job_key + '.')
                return result

            if time.time() - start_time > timeoutSecs:
                H2O.verboseprint('Job: ' + job_key + ' timed out in: ' + timeoutSecs + '.')
                return None

            time.sleep(retryDelaySecs)
Пример #22
0
    def poll_job(self, job_key, timeoutSecs=10, retryDelaySecs=0.5, **kwargs):
        params_dict = {
        }
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'poll_job', True)

        start_time = time.time()
        while True:
            H2O.verboseprint('Polling for job: ' + job_key + '. . .')
            result = self.__do_json_request('2/Jobs.json/' + job_key, timeout=timeoutSecs, params=params_dict)
            
            if result['jobs'][0]['status'] == 'DONE' or result['jobs'][0]['status'] == 'CANCELLED' or result['jobs'][0]['status'] == 'FAILED':
                H2O.verboseprint('Job ' + result['jobs'][0]['status'] + ': ' + job_key + '.')
                return result

            if time.time() - start_time > timeoutSecs:
                H2O.verboseprint('Job: ' + job_key + ' timed out in: ' + timeoutSecs + '.')
                return None

            time.sleep(retryDelaySecs)
Пример #23
0
    def poll_job(self, job_key, timeoutSecs=10, retryDelaySecs=0.5, **kwargs):
        params_dict = {
        }
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'poll_job', H2O.verbose)

        start_time = time.time()
        while True:
            H2O.verboseprint('Polling for job: ' + job_key + '. . .')
            result = self.__do_json_request('/3/Jobs/' + job_key, timeout=timeoutSecs, params=params_dict)
            
            status = result['jobs'][0]['status']
            if status == 'DONE' or status == 'CANCELLED' or status == 'FAILED':
                H2O.verboseprint('Job ' + status + ': ' + job_key + '.')
                return result

            if time.time() - start_time > timeoutSecs:
                print 'Job: ' + job_key + ' timed out in: ' + str(timeoutSecs) + '.'
                # downstream checkers should tolerate None. Print msg in case it's overlooked.
                return None

            time.sleep(retryDelaySecs)
Пример #24
0
    def poll_job(self, job_key, timeoutSecs=10, retryDelaySecs=0.5, **kwargs):
        params_dict = {
        }
        h2o_util.check_params_update_kwargs(params_dict, kwargs, 'poll_job', H2O.verbose)

        start_time = time.time()
        while True:
            H2O.verboseprint('Polling for job: ' + job_key + '. . .')
            result = self.__do_json_request('/3/Jobs/' + job_key, timeout=timeoutSecs, params=params_dict)
            
            status = result['jobs'][0]['status']
            if status == 'DONE' or status == 'CANCELLED' or status == 'FAILED':
                H2O.verboseprint('Job ' + status + ': ' + job_key + '.')
                return result

            if time.time() - start_time > timeoutSecs:
                print 'Job: ' + job_key + ' timed out in: ' + str(timeoutSecs) + '.'
                # downstream checkers should tolerate None. Print msg in case it's overlooked.
                return None

            time.sleep(retryDelaySecs)
Пример #25
0
 def summary(self, key, column, timeoutSecs=10, **kwargs):
     params_dict = { }
     h2o_util.check_params_update_kwargs(params_dict, kwargs, 'summary', True)
     
     result = self.__do_json_request('3/Frames.json/' + key + '/columns/' + column + '/summary', timeout=timeoutSecs, params=params_dict)
     return result
Пример #26
0
    def parse(self, key, key2=None,
              timeoutSecs=300, retryDelaySecs=0.2, initialDelaySecs=None, pollTimeoutSecs=180,
              noise=None, benchmarkLogging=None, noPoll=False, **kwargs):

        #
        # Call ParseSetup?srcs=[keys] . . .
        #

        if benchmarkLogging:
            cloudPerfH2O.get_log_save(initOnly=True)

        # TODO: multiple keys
        parse_setup_params = {
            'srcs': "[" + key + "]"
        }
        # h2o_util.check_params_update_kwargs(params_dict, kwargs, 'parse_setup', print_params=True)
        setup_result = self.__do_json_request(jsonRequest="ParseSetup.json", timeout=timeoutSecs, params=parse_setup_params)
        H2O.verboseprint("ParseSetup result:", h2o_util.dump_json(setup_result))

        # 
        # and then Parse?srcs=<keys list> and params from the ParseSetup result
        # Parse?srcs=[nfs://Users/rpeck/Source/h2o2/smalldata/logreg/prostate.csv]&hex=prostate.hex&pType=CSV&sep=44&ncols=9&checkHeader=0&singleQuotes=false&columnNames=[ID,%20CAPSULE,%20AGE,%20RACE,%20DPROS,%20DCAPS,%20PSA,%20VOL,%20GLEASON]
        #

        first = True
        ascii_column_names = '['
        for s in setup_result['columnNames']:
            if not first: ascii_column_names += ', '
            ascii_column_names += str(s)
            first  = False
        ascii_column_names += ']'

        parse_params = {
            'srcs': "[" + setup_result['srcs'][0]['name'] + "]", # TODO: cons up the whole list
            'hex': setup_result['hexName'],
            'pType': setup_result['pType'],
            'sep': setup_result['sep'],
            'ncols': setup_result['ncols'],
            'checkHeader': setup_result['checkHeader'],
            'singleQuotes': setup_result['singleQuotes'],
            'columnNames': ascii_column_names,
        }
        print "parse_params: ", parse_params
        h2o_util.check_params_update_kwargs(parse_params, kwargs, 'parse', print_params=True)

        parse_result = self.__do_json_request(jsonRequest="Parse.json", timeout=timeoutSecs, params=parse_params, **kwargs)
        H2O.verboseprint("Parse result:", h2o_util.dump_json(parse_result))

        job_key = parse_result['job']['name']

        # TODO: dislike having different shapes for noPoll and poll
        if noPoll:
            return this.jobs(job_key)

        job_json = self.poll_job(job_key, timeoutSecs=timeoutSecs)

        if job_json:
            dest_key = job_json['jobs'][0]['dest']['name']
            return self.frames(dest_key)

        return None
Пример #27
0
    def parse(self, key, key2=None,
              timeoutSecs=300, retryDelaySecs=0.2, initialDelaySecs=None, pollTimeoutSecs=180,
              noise=None, benchmarkLogging=None, noPoll=False, **kwargs):

        #
        # Call ParseSetup?srcs=[keys] . . .
        #

        if benchmarkLogging:
            cloudPerfH2O.get_log_save(initOnly=True)

        # TODO: multiple keys
        parse_setup_params = {
            'srcs': "[" + key + "]"
        }
        # h2o_util.check_params_update_kwargs(params_dict, kwargs, 'parse_setup', print_params=True)
        setup_result = self.__do_json_request(jsonRequest="ParseSetup.json", timeout=timeoutSecs, params=parse_setup_params)
        H2O.verboseprint("ParseSetup result:", h2o_util.dump_json(setup_result))

        # 
        # and then Parse?srcs=<keys list> and params from the ParseSetup result
        # Parse?srcs=[nfs://Users/rpeck/Source/h2o2/smalldata/logreg/prostate.csv]&hex=prostate.hex&pType=CSV&sep=44&ncols=9&checkHeader=0&singleQuotes=false&columnNames=[ID,%20CAPSULE,%20AGE,%20RACE,%20DPROS,%20DCAPS,%20PSA,%20VOL,%20GLEASON]
        #

        first = True
        ascii_column_names = '['
        for s in setup_result['columnNames']:
            if not first: ascii_column_names += ', '
            ascii_column_names += str(s)
            first  = False
        ascii_column_names += ']'

        parse_params = {
            'srcs': "[" + setup_result['srcs'][0]['name'] + "]", # TODO: cons up the whole list
            'hex': setup_result['hexName'],
            'pType': setup_result['pType'],
            'sep': setup_result['sep'],
            'ncols': setup_result['ncols'],
            'checkHeader': setup_result['checkHeader'],
            'singleQuotes': setup_result['singleQuotes'],
            'columnNames': ascii_column_names,
        }
        print "parse_params: ", parse_params
        h2o_util.check_params_update_kwargs(parse_params, kwargs, 'parse', print_params=True)

        parse_result = self.__do_json_request(jsonRequest="Parse.json", timeout=timeoutSecs, params=parse_params, **kwargs)
        H2O.verboseprint("Parse result:", h2o_util.dump_json(parse_result))

        job_key = parse_result['job']['name']

        # TODO: dislike having different shapes for noPoll and poll
        if noPoll:
            return this.jobs(job_key)

        job_json = self.poll_job(job_key, timeoutSecs=timeoutSecs)

        if job_json:
            dest_key = job_json['jobs'][0]['dest']['name']
            return self.frames(dest_key)

        return None