示例#1
0
文件: h2o_ray.py 项目: zxsted/h2o-3
def compute_model_metrics(self, model, frame, timeoutSecs=60, **kwargs):
    """
    Score a model on the h2o cluster on the given Frame and return only the model metrics. 
    """
    assert model is not None, '"model" parameter is null'
    assert frame is not None, '"frame" parameter is null'

    models = self.models(key=model, timeoutSecs=timeoutSecs)
    assert models is not None, "/Models REST call failed"
    assert (
        models["models"][0]["model_id"]["name"] == model
    ), "/Models/{0} returned Model {1} rather than Model {2}".format(model, models["models"][0]["key"]["name"], model)

    # TODO: test this assert, I don't think this is working. . .
    frames = self.frames(key=frame)
    assert frames is not None, "/Frames/{0} REST call failed".format(frame)

    print "frames:", dump_json(frames)
    # is the name not there?
    # assert frames['frames'][0]['model_id']['name'] == frame, "/Frames/{0} returned Frame {1} rather than Frame {2}".format(frame, models['models'][0]['key']['name'], frame)

    result = self.do_json_request(
        "/3/ModelMetrics.json/models/" + model + "/frames/" + frame, cmd="post", timeout=timeoutSecs
    )

    mm = result["model_metrics"][0]
    verboseprint("model metrics: " + repr(mm))
    h2o_sandbox.check_sandbox_for_errors()
    return mm
示例#2
0
def rapids_iseval(self, timeoutSecs=120, ignoreH2oError=False, **kwargs):
    # FIX! assume both of these are strings for now, not lists
    if 'ast_key' in kwargs and kwargs['ast_key'] is not None:
        assert isinstance(
            kwargs['ast_key'],
            basestring), "only string assumed? %s" % kwargs['ast_key']

    # currently runExec only does one or the other
    params_dict = {
        'ast_key': None,
    }

    check_params_update_kwargs(params_dict, kwargs, 'rapids_iseval', True)
    # doesn't like 'put' here?
    # doesn't like empty key
    result = self.do_json_request('3/Rapids.json/isEval',
                                  cmd='get',
                                  timeout=timeoutSecs,
                                  params=params_dict)
    verboseprint("rapids_iseval result:", dump_json(result))

    # FIX! maybe add something for ignoring conditionally?
    if 'exception' in result and result['exception'] and not ignoreH2oError:
        exception = result['exception']
        raise Exception('rapids with kwargs:\n%s\ngot exception:\n"%s"\n' %
                        (dump_json(kwargs), exception))

    h2o_sandbox.check_sandbox_for_errors()
    return result
示例#3
0
 def is_alive(self):
     verboseprint("Doing is_alive check for ExternalH2O")
     try:
         self.get_cloud()
         return True
     except:
         return False
示例#4
0
文件: h2o_ray.py 项目: patchlog/h2o-3
def compute_model_metrics(self, model, frame, timeoutSecs=60, **kwargs):
    '''
    Score a model on the h2o cluster on the given Frame and return only the model metrics. 
    '''
    assert model is not None, '"model" parameter is null'
    assert frame is not None, '"frame" parameter is null'

    models = self.models(key=model, timeoutSecs=timeoutSecs)
    assert models is not None, "/Models REST call failed"
    assert models['models'][0]['model_id'][
        'name'] == model, "/Models/{0} returned Model {1} rather than Model {2}".format(
            model, models['models'][0]['key']['name'], model)

    # TODO: test this assert, I don't think this is working. . .
    frames = self.frames(key=frame)
    assert frames is not None, "/Frames/{0} REST call failed".format(frame)

    print "frames:", dump_json(frames)
    # is the name not there?
    # assert frames['frames'][0]['model_id']['name'] == frame, "/Frames/{0} returned Frame {1} rather than Frame {2}".format(frame, models['models'][0]['key']['name'], frame)

    result = self.do_json_request('/3/ModelMetrics.json/models/' + model +
                                  '/frames/' + frame,
                                  cmd='post',
                                  timeout=timeoutSecs)

    mm = result['model_metrics'][0]
    verboseprint("model metrics: " + repr(mm))
    h2o_sandbox.check_sandbox_for_errors()
    return mm
示例#5
0
def find_key(pattern=None):
    try:
        patternObj = re.compile(pattern)
    except:
        raise Exception("Need legal string pattern in find_key, not %s",
                        pattern)

    frames = h2o_nodes.nodes[0].frames()['frames']
    keyList = [f['key']['name'] for f in frames]
    print "find_key keyList:", keyList

    result = []
    for key in keyList:
        if patternObj.search(key):
            result.append(key)

    if not result:
        for key in keyList:
            # if python regex didn't find anything, maybe the pattern is unix-style file match
            if fnmatch.fnmatch(key, pattern):
                result.append(key)

    if len(result) == 0:
        verboseprint("Warning: No match for %s" % pattern)
        return None

    if len(result) > 1:
        verboseprint(
            "Warning: multiple imported keys match the key pattern %s, Using: %s"
            % (pattern, result[0]))

    return result[0]
示例#6
0
def validate_model_parameters(self, algo, training_frame, parameters, timeoutSecs=60, **kwargs):
    '''
    Check a dictionary of model builder parameters on the h2o cluster 
    using the given algorithm and model parameters.
    '''
    assert algo is not None, '"algo" parameter is null'
    # Allow this now: assert training_frame is not None, '"training_frame" parameter is null'
    assert parameters is not None, '"parameters" parameter is null'

    model_builders = self.model_builders(timeoutSecs=timeoutSecs)
    assert model_builders is not None, "/ModelBuilders REST call failed"
    assert algo in model_builders['model_builders']
    builder = model_builders['model_builders'][algo]
    
    # TODO: test this assert, I don't think this is working. . .
    if training_frame is not None:
        frames = self.frames(key=training_frame)
        assert frames is not None, "/Frames/{0} REST call failed".format(training_frame)

        key_name = frames['frames'][0]['key']['name']
        assert key_name==training_frame, \
            "/Frames/{0} returned Frame {1} rather than Frame {2}".format(training_frame, key_name, training_frame)

        parameters['training_frame'] = training_frame

    # TODO: add parameter existence checks
    # TODO: add parameter value validation

    # FIX! why ignoreH2oError here?
    result = self.do_json_request('/2/ModelBuilders.json/' + algo + "/parameters", cmd='post', 
        timeout=timeoutSecs, postData=parameters, ignoreH2oError=True, noExtraErrorCheck=True)

    verboseprint("model parameters validation: " + repr(result))
    return result
示例#7
0
def browseJsonHistoryAsUrlLastMatch(matchme, swapme=None):
    if not h2o_args.browse_disable:
        # get rid of the ".json" from the last url used by the test framework.
        # if we hit len(), we point to 0, so stop
        len_history = len(h2o_nodes.json_url_history)
        i = -1
        while (len_history + i != 0
               and not re.search(matchme, h2o_nodes.json_url_history[i])):
            i = i - 1
        url = h2o_nodes.json_url_history[i]

        # chop out the .json to get a browser-able url (can look at json too)
        # Open URL in new window, raising the window if possible.
        # webbrowser.open_new_tab(json_url)
        # UPDATE: with the new API port, the browser stuff has .html
        # but we've not switched everything to new. So do it selectively

        if swapme is not None: url = re.sub(matchme, swapme, url)
        url = re.sub("ProgressPage", "ProgressPage.html", url)
        url = re.sub("Progress?!Page", "Progress.html", url)
        url = re.sub(".json", ".html", url)

        verboseprint("browseJsonHistoryAsUrlLastMatch:", url)
        print "browseJsonHistoryAsUrlLastMatch,  decoded:", urllib.unquote(url)
        webbrowser.open_new_tab(url)
示例#8
0
def compareToFirstGlm(self, key, glm, firstglm):
    # if isinstance(firstglm[key], list):
    # in case it's not a list allready (err is a list)
    verboseprint("compareToFirstGlm key:", key)
    verboseprint("compareToFirstGlm glm[key]:", glm[key])
    # key could be a list or not. if a list, don't want to create list of that list
    # so use extend on an empty list. covers all cases?
    if type(glm[key]) is list:
        kList = glm[key]
        firstkList = firstglm[key]
    elif type(glm[key]) is dict:
        raise Exception("compareToFirstGLm: Not expecting dict for " + key)
    else:
        kList = [glm[key]]
        firstkList = [firstglm[key]]
        print "kbn:", kList, firstkList

    for k, firstk in zip(kList, firstkList):
        # delta must be a positive number ?
        delta = .1 * abs(float(firstk))
        msg = "Too large a delta (" + str(
            delta) + ") comparing current and first for: " + key
        self.assertAlmostEqual(float(k), float(firstk), delta=delta, msg=msg)
        self.assertGreaterEqual(abs(float(k)), 0.0,
                                str(k) + " abs not >= 0.0 in current")
示例#9
0
def find_key(pattern=None):
    try:
        patternObj = re.compile(pattern)
    except:
        raise Exception("Need legal string pattern in find_key, not %s", pattern)

    frames = h2o_nodes.nodes[0].frames()['frames']
    keyList = [f['key']['name'] for f in frames] 
    print "find_key keyList:", keyList

    result = []
    for key in keyList:
        if patternObj.search(key):
            result.append(key)

    if not result:
        for key in keyList:
            # if python regex didn't find anything, maybe the pattern is unix-style file match
            if fnmatch.fnmatch(key, pattern):
                result.append(key)

    if len(result) == 0:
        verboseprint("Warning: No match for %s" % pattern)
        return None

    if len(result) > 1:
        verboseprint("Warning: multiple imported keys match the key pattern %s, Using: %s" % (pattern, result[0]))

    return result[0]
示例#10
0
    def tryThemAll(self, set, rows, enumsOnly=False):
        for eolCase in range(len(self.eolDict)):
            eol = self.eolDict[eolCase]
            # change tokens must be first
            if enumsOnly:
                tcd = self.tokenChangeDict
            else:
                tcd = self.tokenChangeDictEnumsOnly

            for tokenCase in range(len(tcd)):
                newRows1 = self.changeTokens(rows, tokenCase, tcd)
                for sepCase in range(len(self.sepChangeDict)):
                    newRows2 = self.changeSep(newRows1,sepCase)
                    csvPathname = SYNDATASETS_DIR + '/parsetmp_' + \
                        str(set) + "_" + \
                        str(eolCase) + "_" + \
                        str(tokenCase) + "_" + \
                        str(sepCase) + \
                        '.data'
                    self.writeRows(csvPathname,newRows2,eol)
                    if "'" in tcd[tokenCase][0]:
                        singleQuotes = 1
                    else:
                        singleQuotes = 0
                    parseResult = h2i.import_parse(path=csvPathname, schema='local', singleQuotes=singleQuotes,
                        noPrint=not h2o_args.verbose, retryDelaySecs=0.1, 
                        doSummary=DO_SUMMARY, intermediateResults=DO_INTERMEDIATE_RESULTS)

                    if DO_RF:
                        h2o_cmd.runRF(parseResult=parseResult, trees=1,
                            timeoutSecs=10, retryDelaySecs=0.1, noPrint=True, print_params=True)
                    verboseprint("Set", set)
                    h2o.check_sandbox_for_errors()
                    sys.stdout.write('.')
                    sys.stdout.flush()
示例#11
0
    def test_simple2(self):
        # h2o-dev doesn't take ../.. type paths? make find_file return absolute path
        a_node = h2o.nodes[0]

        # import_result = a_node.import_files(path=find_file("smalldata/logreg/prostate.csv"))
        import_result = a_node.import_files(path=find_file("smalldata/poker/poker-hand-testing.data"))
        # print dump_json(import_result)

        k = import_result['keys'][0]
        # frames_result = a_node.frames(key=k[0], len=5)

        frames_result = a_node.frames(key=k)

        frame = frames_result['frames'][0]
        byteSize = frame['byteSize']
        rows = frame['rows']
        columns = frame['columns']
        for c in columns:
            label = c['label']
            missing = c['missing']
            stype = c['type']
            zeros = c['zeros']
            domain = c['domain']

        # print dump_json(frame)

        # how do you parse multiple files
        parse_result = a_node.parse(key=k)

        frame = parse_result['frames'][0]
        hex_key = frame['key']['name']

        verboseprint(hex_key, ":", dump_json(parse_result))
示例#12
0
def simpleCheckGLMGrid(self,
                       glmGridResult,
                       colX=None,
                       allowFailWarning=False,
                       **kwargs):
    # "grid": {
    #    "destination_keys": [
    #        "GLMGridResults__8222a49156af52532a34fb3ce4304308_0",
    #        "GLMGridResults__8222a49156af52532a34fb3ce4304308_1",
    #        "GLMGridResults__8222a49156af52532a34fb3ce4304308_2"
    #   ]
    # },
    destination_key = glmGridResult['grid']['destination_keys'][0]
    inspectGG = h2o_nodes.nodes[0].glm_view(destination_key)
    models = inspectGG['glm_model']['submodels']
    verboseprint("GLMGrid inspect GLMGrid model 0(best):",
                 dump_json(models[0]))
    g = simpleCheckGLM(self,
                       inspectGG,
                       colX,
                       allowFailWarning=allowFailWarning,
                       **kwargs)
    # just to get some save_model testing
    for i, m in enumerate(glmGridResult['grid']['destination_keys']):
        print "Saving model", m, "to model" + str(i)
        h2o_nodes.nodes[0].save_model(model=m, path='model' + str(i), force=1)

    return g
示例#13
0
def rapids(self, timeoutSecs=120, ignoreH2oError=False, **kwargs):
    # FIX! assume both of these are strings for now, not lists
    if 'ast' in kwargs and kwargs['ast'] is not None:
        assert isinstance(kwargs['ast'], basestring), "only string assumed? %s" % kwargs['ast']
    if 'funs' in kwargs and kwargs['funs'] is not None:
        assert isinstance(kwargs['funs'], basestring), "only string assumed? %s" % kwargs['funs']

    # currently runExec only does one or the other
    params_dict = {
        'ast': None,
        'funs': None,
    }

    check_params_update_kwargs(params_dict, kwargs, 'rapids', True)
    if 1==1:
        result = self.do_json_request('Rapids.json', cmd='post', timeout=timeoutSecs, postData=params_dict)
    else:
        result = self.do_json_request('Rapids.json', timeout=timeoutSecs, params=params_dict)

    verboseprint("rapids result:", dump_json(result))

    # FIX! maybe add something for ignoring conditionally?
    if 'exception' in result and result['exception'] and not ignoreH2oError:
        exception = result['exception']
        raise Exception('rapids with kwargs:\n%s\ngot exception:\n"%s"\n' % (dump_json(kwargs), exception))

    h2o_sandbox.check_sandbox_for_errors()
    return result
示例#14
0
def split_frame(self, timeoutSecs=120, noPoll=False, **kwargs):
    params_dict = {
        'dataset': None,
        'ratios': None,
        'destKeys': None,  # ['bigger', 'smaller']
    }
    check_params_update_kwargs(params_dict,
                               kwargs,
                               'split_frame',
                               print_params=True)
    firstResult = self.do_json_request('3/SplitFrame.json',
                                       cmd='post',
                                       timeout=timeoutSecs,
                                       params=params_dict)
    print "firstResult:", dump_json(firstResult)
    # FIX! what is ['dest']['name'] ..It's not there at the beginning?
    job_key = firstResult['key']['name']

    if noPoll:
        h2o_sandbox.check_sandbox_for_errors()
        return firstResult

    # is it polllable while it's in the CREATED state? msec looks wrong. start_time is 0
    time.sleep(2)
    result = self.poll_job(job_key)
    verboseprint("split_frame result:", dump_json(result))
    return result
示例#15
0
文件: h2o_ray.py 项目: patchlog/h2o-3
def models(self, key=None, timeoutSecs=10, **kwargs):
    '''
    Return all of the models in the h2o cluster, or a single model given its key.  
    The models are contained in a list called "models" at the top level of the
    result.  Currently the list is unordered.
    TODO:
    When find_compatible_frames is implemented then the top level 
    dict will also contain a "frames" list.
    '''
    params_dict = {'find_compatible_frames': False}
    h2o_methods.check_params_update_kwargs(params_dict, kwargs, 'models', True)

    if key:
        # result = self.do_json_request('3/Models.json', timeout=timeoutSecs, params=params_dict)
        # print "for ray:", dump_json(result)
        result = self.do_json_request('3/Models.json/' + key,
                                      timeout=timeoutSecs,
                                      params=params_dict)
    else:
        result = self.do_json_request('3/Models.json',
                                      timeout=timeoutSecs,
                                      params=params_dict)

    verboseprint("models result:", dump_json(result))
    h2o_sandbox.check_sandbox_for_errors()
    return result
示例#16
0
def validate_model_parameters(self, algo, training_frame, parameters, timeoutSecs=60, **kwargs):
    '''
    Check a dictionary of model builder parameters on the h2o cluster 
    using the given algorithm and model parameters.
    '''
    assert algo is not None, '"algo" parameter is null'
    # Allow this now: assert training_frame is not None, '"training_frame" parameter is null'
    assert parameters is not None, '"parameters" parameter is null'

    model_builders = self.model_builders(timeoutSecs=timeoutSecs)
    assert model_builders is not None, "/ModelBuilders REST call failed"
    assert algo in model_builders['model_builders']
    builder = model_builders['model_builders'][algo]
    
    # TODO: test this assert, I don't think this is working. . .
    if training_frame is not None:
        frames = self.frames(key=training_frame)
        assert frames is not None, "/Frames/{0} REST call failed".format(training_frame)

        key_name = frames['frames'][0]['key']['name']
        assert key_name==training_frame, \
            "/Frames/{0} returned Frame {1} rather than Frame {2}".format(training_frame, key_name, training_frame)

        parameters['training_frame'] = training_frame

    # TODO: add parameter existence checks
    # TODO: add parameter value validation

    # FIX! why ignoreH2oError here?
    result = self.do_json_request('/3/ModelBuilders.json/' + algo + "/parameters", cmd='post', 
        timeout=timeoutSecs, postData=parameters, ignoreH2oError=True, noExtraErrorCheck=True)

    verboseprint("model parameters validation: " + repr(result))
    return result
示例#17
0
def get_cloud(self, noExtraErrorCheck=False, timeoutSecs=10):
    # hardwire it to allow a 60 second timeout
    a = self.do_json_request('3/Cloud.json',
                             noExtraErrorCheck=noExtraErrorCheck,
                             timeout=timeoutSecs)
    # verboseprint(a)

    version = a['version']
    # local builds have (unknown)    if not version.startswith('0'):
    # local builds have (unknown)        raise Exception("h2o version at node[0] doesn't look like h2o-dev version. (start with 0) %s" % version)

    consensus = a['consensus']
    locked = a['locked']
    cloud_size = a['cloud_size']
    cloud_name = a['cloud_name']
    node_id = self.node_id
    verboseprint('%s%s %s%s %s%s %s%s %s%s' % (
        "\tnode_id: ",
        node_id,
        "\tcloud_size: ",
        cloud_size,
        "\tconsensus: ",
        consensus,
        "\tlocked: ",
        locked,
        "\tversion: ",
        version,
    ))
    return a
示例#18
0
    def test_simple2(self):
        # h2o-dev doesn't take ../.. type paths? make find_file return absolute path
        # csvPathname = find_file("bigdata/laptop/poker-hand-testing.data")
        csvPathname = find_file("smalldata/logreg/prostate.csv")
        import_result = h2o.n0.import_files(path=csvPathname)
        # print dump_json(import_result)

        k = import_result['keys'][0]
        frames_result = h2o.n0.frames(key=k)

        frame = frames_result['frames'][0]
        rows = frame['rows']
        columns = frame['columns']
        for c in columns:
            label = c['label']
            missing = c['missing_count']
            stype = c['type']
            domain = c['domain']

        # print dump_json(frame)

        # let's see what ray's util does
        frames = h2o.n0.frames()['frames']
        frames_dict = h2o_util.list_to_dict(frames, 'key/name')
        # print "frames:", dump_json(frames)
        # print "frames_dict:", dump_json(frames_dict)
        for k,v in frames_dict.items():
            print "frames_dict key:", k

        # interesting. we can do dictionary comprehensions
        # { k:v for k,v in my_dict.items() if 'Peter' in k }

        # how do you parse multiple files
        parse_result = h2o.n0.parse(key=k, intermediateResults=DO_INTERMEDIATE_RESULTS)

        frame = parse_result['frames'][0]
        hex_key = frame['key']['name']

        colCount = 9
        rowCount = 380
        # colCount = 11
        # rowCount = 1000000
        start = time.time()
        inspect = h2o_cmd.runInspect(None, hex_key)
        print "Inspect:", hex_key, "took", time.time() - start, "seconds"
        numCols = len(inspect['frames'][0]['columns'])
        numRows = inspect['frames'][0]['rows']
        print "\n" + csvPathname, \
            "    rows:", "{:,}".format(numRows), \
            "    len(columns):", "{:,}".format(numCols)

        # should match # of cols in header or ??
        self.assertEqual(numCols, colCount,
            "parse created result with the wrong number of cols %s %s" % (numCols, colCount))
        self.assertEqual(numRows, rowCount,
            "parse created result with the wrong number of rows (header shouldn't count) %s %s" % \
            (numRows, rowCount))

        verboseprint(hex_key, ":", dump_json(parse_result))
示例#19
0
 def is_alive(self):
     verboseprint("Doing is_alive check for RemoteH2O")
     if self.channel.closed: return False
     if self.channel.exit_status_ready(): return False
     try:
         self.get_cloud(noExtraErrorCheck=True)
         return True
     except:
         return False
示例#20
0
def file_append(infile, outfile):
    verboseprint("\nAppend'ing", infile, "to", outfile)
    start = time.time()
    in_file = open(infile, 'rb')
    out_file = open(outfile, 'a')
    out_file.write(in_file.read())
    in_file.close()
    out_file.close()
    verboseprint("\nAppend took", (time.time() - start), "secs")
示例#21
0
文件: h2o_util.py 项目: AI-Cdrone/h2o
def file_append(infile, outfile):
    verboseprint("\nAppend'ing", infile, "to", outfile)
    start = time.time()
    in_file = open(infile,'rb')
    out_file = open(outfile,'a')
    out_file.write(in_file.read())
    in_file.close()
    out_file.close()
    verboseprint("\nAppend took",  (time.time() - start), "secs")
示例#22
0
文件: h2o_ray.py 项目: zxsted/h2o-3
def import_files(self, path, timeoutSecs=180):
    """ 
    Import a file or files into h2o.  The 'file' parameter accepts a directory or a single file.
    192.168.0.37:54323/ImportFiles.html?file=%2Fhome%2F0xdiag%2Fdatasets
    """
    a = self.do_json_request("3/ImportFiles.json", timeout=timeoutSecs, params={"path": path})
    verboseprint("\nimport_files result:", dump_json(a))
    h2o_sandbox.check_sandbox_for_errors()
    return a
示例#23
0
def tear_down_cloud(nodeList=None, sandboxIgnoreErrors=False, force=False):
    if h2o_args.sleep_at_tear_down:
        print "Opening browser to cloud, and sleeping for 3600 secs, before cloud teardown (for debug)"
        import h2o_browse as h2b

        h2b.browseTheCloud()
        sleep(3600)

    if not nodeList:
        nodeList = h2o_nodes.nodes

    # this could fail too. Should this be set by -uc/--usecloud? or command line argument
    if nodeList and nodeList[0].delete_keys_at_teardown:
        start = time.time()
        h2i.delete_keys_at_all_nodes(timeoutSecs=300)
        elapsed = time.time() - start
        print "delete_keys_at_all_nodes(): took", elapsed, "secs"

    # could the nodeList still be empty in some exception cases? Assume not for now

    # FIX! don't send shutdown if we're using an existing cloud
    # also, copy the "delete keys at teardown from testdir_release
    # Assume there's a last "test" that's run to shutdown the cloud

    # don't tear down with -ccj either
    # FIX! what about usecloud or cloud_cloud_json params from build_cloud time?
    if force or not (h2o_args.usecloud or h2o_args.clone_cloud_json):
        try:
            # update: send a shutdown to all nodes.
            # h2o maybe doesn't progagate well if sent to one node
            # the api watchdog shouldn't complain about this?
            # just send one?

            # for n in nodeList:
            #     n.shutdown_all()
            h2o_nodes.nodes[0].shutdown_all()
        except:
            pass

        # ah subtle. we might get excepts in issuing the shutdown, don't abort out
        # of trying the process kills if we get any shutdown exception (remember we go to all nodes)
        # so we might? nodes are shutting down?
        # FIX! should we wait a bit for a clean shutdown, before we process kill?
        # It can take more than 1 sec though.
        try:
            time.sleep(2)
            for n in nodeList:
                n.terminate()
                verboseprint("tear_down_cloud n:", n)
        except:
            pass

    check_sandbox_for_errors(sandboxIgnoreErrors=sandboxIgnoreErrors, python_test_name=h2o_args.python_test_name)
    # get rid of all those pesky line marker files. Unneeded now
    clean_sandbox_doneToLine()
    nodeList[:] = []
    h2o_nodes.nodes = []
示例#24
0
def checkScalarResult(resultExec, resultKey, allowEmptyResult=False, nanOkay=False):
    # make the common problems easier to debug
    verboseprint("checkScalarResult resultExec:", dump_json(resultExec))

    if 'funstr' not in resultExec:
        emsg = "checkScalarResult: 'funstr' missing"
    if 'result' not in resultExec:
        emsg = "checkScalarResult: 'result' missing"
    if 'scalar' not in resultExec:
        emsg = "checkScalarResult: 'scalar' missing"
    if 'num_cols' not in resultExec:
        emsg = "checkScalarResult: 'num_cols' missing"
    if 'num_rows' not in resultExec:
        emsg = "checkScalarResult: 'num_rows' missing"
    elif 'cols' not in resultExec:
        emsg = "checkScalarResult: 'cols' missing"
    else:
        emsg = None
        num_cols = resultExec["num_cols"]
        num_rows = resultExec["num_rows"]
        cols = resultExec["cols"]
        # print "cols:", dump_json(cols)

    if emsg:
        print "\nKey: '" + str(resultKey) + "' resultExec:\n", dump_json(resultExec)
        sys.stdout.flush()
        raise Exception("exec result (resultExec) missing what we expected. Look at json above. " + emsg)

    if (cols and (not num_rows or num_rows==0) ) and not allowEmptyResult:
        print "resultExec[0]:", dump_json(resultExec)
        raise Exception ("checkScalarResult says 'cols' exist in exec json response,"+\
            " but num_rows: %s is 0 or None. Is that an expected 'empty' key state?" % num_rows+\
            " Use 'allowEmptyResult if so.")

    # Cycle thru rows and extract all the meta-data into a dict?   
    # assume "0" and "row" keys exist for each list entry in rows
    # FIX! the key for the value can be 0 or 1 or ?? (apparently col?) Should change H2O here

    # cols may not exist..if the result was just scalar?
    if not cols:
        # just return the scalar result then
        scalar = resultExec['scalar']
        if scalar is None:
            raise Exception("both cols and scalar are null: %s %s" % (cols, scalar))
        checkForBadFP(scalar, json=resultExec, nanOkay=nanOkay)
        return scalar

    metaDict = cols[0]
    for key,value in metaDict.items():
        print "Inspect metaDict:", key, value
            
    min_value = metaDict['min']
    stype = metaDict['type']
    # if it's an enum col, it's okay for min to be NaN ..
    checkForBadFP(min_value, json=metaDict, nanOkay=nanOkay or stype=='Enum')
    return min_value
示例#25
0
def checkScalarResult(resultExec, resultKey, allowEmptyResult=False, nanOkay=False):
    # make the common problems easier to debug
    verboseprint("checkScalarResult resultExec:", dump_json(resultExec))

    if 'funstr' not in resultExec:
        emsg = "checkScalarResult: 'funstr' missing"
    if 'result' not in resultExec:
        emsg = "checkScalarResult: 'result' missing"
    if 'scalar' not in resultExec:
        emsg = "checkScalarResult: 'scalar' missing"
    if 'num_cols' not in resultExec:
        emsg = "checkScalarResult: 'num_cols' missing"
    if 'num_rows' not in resultExec:
        emsg = "checkScalarResult: 'num_rows' missing"
    elif 'cols' not in resultExec:
        emsg = "checkScalarResult: 'cols' missing"
    else:
        emsg = None
        num_cols = resultExec["num_cols"]
        num_rows = resultExec["num_rows"]
        cols = resultExec["cols"]
        # print "cols:", dump_json(cols)

    if emsg:
        print "\nKey: '" + str(resultKey) + "' resultExec:\n", dump_json(resultExec)
        sys.stdout.flush()
        raise Exception("exec result (resultExec) missing what we expected. Look at json above. " + emsg)

    if (cols and (not num_rows or num_rows==0) ) and not allowEmptyResult:
        print "resultExec[0]:", dump_json(resultExec)
        raise Exception ("checkScalarResult says 'cols' exist in exec json response,"+\
            " but num_rows: %s is 0 or None. Is that an expected 'empty' key state?" % num_rows+\
            " Use 'allowEmptyResult if so.")

    # Cycle thru rows and extract all the meta-data into a dict?   
    # assume "0" and "row" keys exist for each list entry in rows
    # FIX! the key for the value can be 0 or 1 or ?? (apparently col?) Should change H2O here

    # cols may not exist..if the result was just scalar?
    if not cols:
        # just return the scalar result then
        scalar = resultExec['scalar']
        if scalar is None:
            raise Exception("both cols and scalar are null: %s %s" % (cols, scalar))
        checkForBadFP(scalar, json=resultExec, nanOkay=nanOkay)
        return scalar

    metaDict = cols[0]
    for key,value in metaDict.items():
        print "Inspect metaDict:", key, value
            
    min_value = metaDict['min']
    stype = metaDict['type']
    # if it's an enum col, it's okay for min to be NaN ..
    checkForBadFP(min_value, json=metaDict, nanOkay=nanOkay or stype=='Enum')
    return min_value
示例#26
0
def typeahead(self, timeoutSecs=10, **kwargs):
    params_dict = {
        'src': None,
        'limit': None,
    }
    check_params_update_kwargs(params_dict, kwargs, 'typeahead', print_params=True)
    # odd ...needs /files
    a = self.do_json_request('3/Typeahead.json/files', params=params_dict, timeout=timeoutSecs)
    verboseprint("\ntypeahead result:", dump_json(a))
    return a
示例#27
0
def runStoreView(node=None, timeoutSecs=30, noPrint=None, **kwargs):
    if not node: node = h2o_nodes.nodes[0]
    storeView = node.store_view(timeoutSecs, **kwargs)
    if not noPrint:
        for s in storeView['keys']:
            print "StoreView: key:", s['key']
            if 'rows' in s: 
                verboseprint("StoreView: rows:", s['rows'], "value_size_bytes:", s['value_size_bytes'])
    print node, 'storeView has', len(storeView['keys']), 'keys'
    return storeView
示例#28
0
def typeahead(self, timeoutSecs=10, **kwargs):
    params_dict = {
        'src': None,
        'limit': None,
    }
    check_params_update_kwargs(params_dict, kwargs, 'typeahead', print_params=True)
    # odd ...needs /files
    a = self.do_json_request('3/Typeahead.json/files', params=params_dict, timeout=timeoutSecs)
    verboseprint("\ntypeahead result:", dump_json(a))
    return a
示例#29
0
def tear_down_cloud(nodeList=None, sandboxIgnoreErrors=False, force=False):
    if h2o_args.sleep_at_tear_down:
        print "Opening browser to cloud, and sleeping for 3600 secs, before cloud teardown (for debug)"
        import h2o_browse as h2b
        h2b.browseTheCloud()
        sleep(3600)

    if not nodeList: nodeList = h2o_nodes.nodes

    # this could fail too. Should this be set by -uc/--usecloud? or command line argument
    if nodeList and nodeList[0].delete_keys_at_teardown:
        start = time.time()
        h2i.delete_keys_at_all_nodes(timeoutSecs=300)
        elapsed = time.time() - start
        print "delete_keys_at_all_nodes(): took", elapsed, "secs"

    # could the nodeList still be empty in some exception cases? Assume not for now

    # FIX! don't send shutdown if we're using an existing cloud
    # also, copy the "delete keys at teardown from testdir_release
    # Assume there's a last "test" that's run to shutdown the cloud

    # don't tear down with -ccj either
    # FIX! what about usecloud or cloud_cloud_json params from build_cloud time?
    if force or not (h2o_args.usecloud or h2o_args.clone_cloud_json):
        try:
            # update: send a shutdown to all nodes. 
            # h2o maybe doesn't progagate well if sent to one node
            # the api watchdog shouldn't complain about this?
            # just send one?

            # for n in nodeList:
            #     n.shutdown_all()
            h2o_nodes.nodes[0].shutdown_all()
        except:
            pass

        # ah subtle. we might get excepts in issuing the shutdown, don't abort out
        # of trying the process kills if we get any shutdown exception (remember we go to all nodes)
        # so we might? nodes are shutting down?
        # FIX! should we wait a bit for a clean shutdown, before we process kill?
        # It can take more than 1 sec though.
        try:
            time.sleep(2)
            for n in nodeList:
                n.terminate()
                verboseprint("tear_down_cloud n:", n)
        except:
            pass

    check_sandbox_for_errors(sandboxIgnoreErrors=sandboxIgnoreErrors, python_test_name=h2o_args.python_test_name)
    # get rid of all those pesky line marker files. Unneeded now
    clean_sandbox_doneToLine()
    nodeList[:] = []
    h2o_nodes.nodes = []
示例#30
0
    def upload_file(self, f, progress=None):
        # FIX! we won't find it here if it's hdfs://172.16.2.151/ file
        f = find_file(f)
        if f not in self.uploaded:
            start = time.time()
            import md5

            m = md5.new()
            m.update(open(f).read())
            m.update(getpass.getuser())
            dest = '/tmp/' + m.hexdigest() + "-" + os.path.basename(f)

            # sigh. we rm/create sandbox in build_cloud now
            # (because nosetests doesn't exec h2o_main and we
            # don't want to code "clean_sandbox()" in all the tests.
            # So: we don't have a sandbox here, or if we do, we're going to delete it.
            # Just don't log anything until build_cloud()? that should be okay?
            # we were just logging this upload message..not needed.
            # log('Uploading to %s: %s -> %s' % (self.http_addr, f, dest))
            sftp = self.ssh.open_sftp()
            # check if file exists on remote side
            # does paramiko have issues with big files? (>1GB, or 650MB?). maybe we don't care.
            # This would arise (as mentioned in the source, line no 667,
            # http://www.lag.net/paramiko/docs/paramiko.sftp_client-pysrc.html) when there is
            # any error reading the packet or when there is EOFError

            # but I'm getting sftp close here randomly at sm.
            # http://stackoverflow.com/questions/22708942/python-paramiko-module-error-with-callback
            # http://stackoverflow.com/questions/15010540/paramiko-sftp-server-connection-dropped
            # http://stackoverflow.com/questions/12322210/handling-paramiko-sshexception-server-connection-dropped
            try:
                # note we don't do a md5 compare. so if a corrupted file was uploaded we won't re-upload
                # until we do another build.
                sftp.stat(dest)
                print "{0} Skipping upload of file {1}. File {2} exists on remote side!".format(
                    self, f, dest)
            except IOError, e:
                # if self.channel.closed or self.channel.exit_status_ready():
                #     raise Exception("something bad happened to our %s being used for sftp. keepalive? %s %s" % \
                #         (self, self.channel.closed, self.channel.exit_status_ready()))

                if e.errno == errno.ENOENT:  # no such file or directory
                    verboseprint("{0} uploading file {1}".format(self, f))
                    sftp.put(f, dest, callback=progress)
                    # if you want to track upload times
                    ### print "\n{0:.3f} seconds".format(time.time() - start)
                elif e.errno == errno.EEXIST:  # File Exists
                    pass
                else:
                    print "Got unexpected errno: %s on paramiko sftp." % e.errno
                    print "Lookup here: https://docs.python.org/2/library/errno.html"
                    # throw the exception again, if not what we expected
                    exc_info = sys.exc_info()
                    raise exc_info[1], None, exc_info[2]
            finally:
示例#31
0
def jobs_admin(self, timeoutSecs=120, **kwargs):
    params_dict = {
        # 'expression': None,
    }
    params_dict.update(kwargs)
    verboseprint("\njobs_admin:", params_dict)
    a = self.do_json_request('3/Jobs.json', timeout=timeoutSecs, params=params_dict)
    verboseprint("\njobs_admin result:", dump_json(a))
    # print "WARNING: faking jobs admin"
    # a = { 'jobs': {} }
    return a
示例#32
0
def import_files(self, path, timeoutSecs=180):
    ''' 
    Import a file or files into h2o.  The 'file' parameter accepts a directory or a single file.
    192.168.0.37:54323/ImportFiles.html?file=%2Fhome%2F0xdiag%2Fdatasets
    '''
    a = self.do_json_request('2/ImportFiles.json',
        timeout=timeoutSecs,
        params={"path": path}
    )
    verboseprint("\nimport_files result:", dump_json(a))
    return a
示例#33
0
文件: h2o_ray.py 项目: patchlog/h2o-3
def import_files(self, path, timeoutSecs=180):
    ''' 
    Import a file or files into h2o.  The 'file' parameter accepts a directory or a single file.
    192.168.0.37:54323/ImportFiles.html?file=%2Fhome%2F0xdiag%2Fdatasets
    '''
    a = self.do_json_request('3/ImportFiles.json',
                             timeout=timeoutSecs,
                             params={"path": path})
    verboseprint("\nimport_files result:", dump_json(a))
    h2o_sandbox.check_sandbox_for_errors()
    return a
示例#34
0
def jobs_admin(self, timeoutSecs=120, **kwargs):
    params_dict = {
        # 'expression': None,
    }
    params_dict.update(kwargs)
    verboseprint("\njobs_admin:", params_dict)
    a = self.do_json_request('Jobs.json', timeout=timeoutSecs, params=params_dict)
    verboseprint("\njobs_admin result:", dump_json(a))
    # print "WARNING: faking jobs admin"
    # a = { 'jobs': {} }
    return a
示例#35
0
    def upload_file(self, f, progress=None):
        # FIX! we won't find it here if it's hdfs://172.16.2.151/ file
        f = find_file(f)
        if f not in self.uploaded:
            start = time.time()
            import md5

            m = md5.new()
            m.update(open(f).read())
            m.update(getpass.getuser())
            dest = '/tmp/' + m.hexdigest() + "-" + os.path.basename(f)

            # sigh. we rm/create sandbox in build_cloud now
            # (because nosetests doesn't exec h2o_main and we
            # don't want to code "clean_sandbox()" in all the tests.
            # So: we don't have a sandbox here, or if we do, we're going to delete it.
            # Just don't log anything until build_cloud()? that should be okay?
            # we were just logging this upload message..not needed.
            # log('Uploading to %s: %s -> %s' % (self.http_addr, f, dest))
            sftp = self.ssh.open_sftp()
            # check if file exists on remote side
            # does paramiko have issues with big files? (>1GB, or 650MB?). maybe we don't care.
            # This would arise (as mentioned in the source, line no 667, 
            # http://www.lag.net/paramiko/docs/paramiko.sftp_client-pysrc.html) when there is 
            # any error reading the packet or when there is EOFError

            # but I'm getting sftp close here randomly at sm.
            # http://stackoverflow.com/questions/22708942/python-paramiko-module-error-with-callback
            # http://stackoverflow.com/questions/15010540/paramiko-sftp-server-connection-dropped
            # http://stackoverflow.com/questions/12322210/handling-paramiko-sshexception-server-connection-dropped
            try:
                # note we don't do a md5 compare. so if a corrupted file was uploaded we won't re-upload 
                # until we do another build.
                sftp.stat(dest)
                print "{0} Skipping upload of file {1}. File {2} exists on remote side!".format(self, f, dest)
            except IOError, e:
                # if self.channel.closed or self.channel.exit_status_ready():
                #     raise Exception("something bad happened to our %s being used for sftp. keepalive? %s %s" % \
                #         (self, self.channel.closed, self.channel.exit_status_ready()))

                if e.errno == errno.ENOENT: # no such file or directory
                    verboseprint("{0} uploading file {1}".format(self, f))
                    sftp.put(f, dest, callback=progress)
                    # if you want to track upload times
                    ### print "\n{0:.3f} seconds".format(time.time() - start)
                elif e.errno == errno.EEXIST: # File Exists
                    pass
                else:
                    print "Got unexpected errno: %s on paramiko sftp." % e.errno
                    print "Lookup here: https://docs.python.org/2/library/errno.html"
                    # throw the exception again, if not what we expected
                    exc_info = sys.exc_info()
                    raise exc_info[1], None, exc_info[2]
            finally:
示例#36
0
文件: h2o_import.py 项目: 100star/h2o
def find_key(pattern=None):
    found = None
    kwargs = {'filter': pattern}
    storeViewResult = h2o_nodes.nodes[0].store_view(**kwargs)
    keys = storeViewResult['keys']
    if len(keys) == 0:
        return None

    if len(keys) > 1:
        verboseprint("Warning: multiple imported keys match the key pattern given, Using: %s" % keys[0]['key'])

    return keys[0]['key']
示例#37
0
文件: h2o_get_ip.py 项目: kpmui4/h2o
def get_ip_address(ipFromCmdLine=None):
    if ipFromCmdLine:
        verboseprint("get_ip case 1:", ipFromCmdLine)
        return ipFromCmdLine

    ip = '127.0.0.1'
    socket.setdefaulttimeout(0.5)
    hostname = socket.gethostname()
    # this method doesn't work if vpn is enabled..it gets the vpn ip
    try:
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(('8.8.8.8', 0))
        ip = s.getsockname()[0]
        verboseprint("get_ip case 2:", ip)
    except:
        pass

    if ip.startswith('127'):
        # drills down into family
        ip = socket.getaddrinfo(hostname, None)[0][4][0]
        verboseprint("get_ip case 3:", ip)

    ipa = None
    # we had some hosts that didn't support gethostbyname_ex().
    # hopefully we don't need a hack to exclude
    # the gethostbyname_ex can be slow. the timeout above will save us quickly
    try:
        # Translate a host name to IPv4 address format, extended interface.
        # This should be resolve by dns so it's the right ip for talking to this guy?
        # Return a triple (hostname, aliaslist, ipaddrlist)
        # where hostname is the primary host name responding to the given ip_address,
        # aliaslist is a (possibly empty) list of alternative host names for the same address,
        # ipaddrlist is a list of IPv4 addresses for the same interface on the same host
        ghbx = socket.gethostbyname_ex(hostname)
        for ips in ghbx[2]:
            # only take the first
            if ipa is None and not ips.startswith("127."):
                ipa = ips[:]
                verboseprint("get_ip case 4:", ipa)
                if ip != ipa:
                    print "\nAssuming", ip, "is the ip address h2o will use but", ipa,\
                        "is probably the real ip?"
                    print "You might have a vpn active. Best to use '-ip", ipa,\
                        "'to get python and h2o the same."
    except:
        pass
        # print "Timeout during socket.gethostbyname_ex(hostname)"

    verboseprint("get_ip_address:", ip)
    # set it back to default higher timeout (None would be no timeout?)
    socket.setdefaulttimeout(5)
    return ip
示例#38
0
def exec_expr(node=None, execExpr=None, resultKey=None, timeoutSecs=10, ignoreH2oError=False):
    if not node:
        node = h2o_nodes.nodes[0]
    kwargs = {'ast': execExpr} 
    start = time.time()
    resultExec = h2o_cmd.runExec(node, timeoutSecs=timeoutSecs, ignoreH2oError=ignoreH2oError, **kwargs)
    verboseprint('exec took', time.time() - start, 'seconds')
    print "exec:", dump_json(resultExec)

    # when do I get cols?

    # "result": "1.0351050710011848E-300", 
    # "scalar": 1.0351050710011848e-300, 
    # "funstr": null, 

    # "key": null, 
    # "col_names": null, 
    # "num_cols": 0, 
    # "num_rows": 0, 

    # "exception": null, 

    # echoing?
    # "string": null
    # "funs": null, 
    # "ast": "(= !x (xorsum ([ $r1 \"null\" #0) $TRUE))", 

    if 'cols' in resultExec and resultExec['cols']: # not null
        if 'funstr' in resultExec and resultExec['funstr']: # not null
            raise Exception("cols and funstr shouldn't both be in resultExec: %s" % dump_json(resultExec))
        else:
            print "Frame return"
            # if test said to look at a resultKey, it's should be in h2o k/v store
            # inspect a result key?
            # Should we get the key name from the exec return?
            if resultKey is not None:
                kwargs = {'ast': resultKey} 
                resultExec = h2o_cmd.runExec(node, timeoutSecs=timeoutSecs, ignoreH2oError=ignoreH2oError, **kwargs)
                print "exec key result:", dump_json(resultExec)

            # handles the 1x1 data frame result. Not really interesting if bigger than 1x1?
            result = resultExec['cols'][0]['min']
        
    else: 
        if 'funstr' in resultExec and resultExec['funstr']: # not null
            print "function return"
            result = resultExec['funstr']
        else:
            print "scalar return"
            result = resultExec['scalar']
            
    return resultExec, result
示例#39
0
def wait_for_live_port(ip, port, retries=3):
    verboseprint("Waiting for {0}:{1} {2}times...".format(ip,port,retries))
    if not port_live(ip,port):
        count = 0
        while count < retries:
            if port_live(ip,port):
                count += 1
            else:
                count = 0
            time.sleep(1)
            dot()
    if not port_live(ip,port):
        raise Exception("[h2o_cmd] Error waiting for {0}:{1} {2}times...".format(ip,port,retries))
示例#40
0
    def test_simple2(self):
        # h2o-dev doesn't take ../.. type paths? make find_file return absolute path
        a_node = h2o.nodes[0]

        import_result = a_node.import_files(path=find_file("smalldata/logreg/prostate.csv"))
        print dump_json(import_result)

        frames = a_node.frames(key=import_result['keys'][0], len=5)['frames']
        print dump_json(frames)

        parse_result = a_node.parse(key=import_result['keys'][0])
        hex_key = parse_result['frames'][0]['key']['name']
        verboseprint(hex_key, ":", dump_json(parse_result))
示例#41
0
    def wait_for_node_to_accept_connections(self, nodeList, timeoutSecs=15, noExtraErrorCheck=False):
        verboseprint("wait_for_node_to_accept_connections")

        def test(n, tries=None, timeoutSecs=timeoutSecs):
            try:
                n.get_cloud(noExtraErrorCheck=noExtraErrorCheck, timeoutSecs=timeoutSecs)
                return True
            except requests.ConnectionError, e:
                # Now using: requests 1.1.0 (easy_install --upgrade requests) 2/5/13
                # Now: assume all requests.ConnectionErrors are H2O legal connection errors.
                # Have trouble finding where the errno is, fine to assume all are good ones.
                # Timeout check will kick in if continued H2O badness.
                return False
示例#42
0
def quantiles(self, timeoutSecs=300, print_params=True, **kwargs):
    params_dict = {
        'source_key': None,
        'column': None,
        'quantile': None,
        'max_qbins': None,
        'interpolation_type': None,
        'multiple_pass': None,
    }
    check_params_update_kwargs(params_dict, kwargs, 'quantiles', print_params)
    a = self.do_json_request('Quantiles.json', timeout=timeoutSecs, params=params_dict)
    verboseprint("\nquantiles result:", dump_json(a))
    return a
示例#43
0
def quantiles(self, timeoutSecs=300, print_params=True, **kwargs):
    params_dict = {
        'destination_key': None,
        'training_frame': None,
        'validation_frame': None,
        'ignored_columns': None,
        'score_each_iteration': None,
        'probs': None,
    }
    check_params_update_kwargs(params_dict, kwargs, 'quantiles', print_params)
    a = self.do_json_request('3/Quantiles.json', timeout=timeoutSecs, params=params_dict)
    verboseprint("\nquantiles result:", dump_json(a))
    h2o_sandbox.check_sandbox_for_errors()
    return a
示例#44
0
def find_key(pattern=None):
    found = None
    kwargs = {'filter': pattern}
    storeViewResult = h2o_nodes.nodes[0].store_view(**kwargs)
    keys = storeViewResult['keys']
    if len(keys) == 0:
        return None

    if len(keys) > 1:
        verboseprint(
            "Warning: multiple imported keys match the key pattern given, Using: %s"
            % keys[0]['key'])

    return keys[0]['key']
示例#45
0
def quantiles(self, timeoutSecs=300, print_params=True, **kwargs):
    params_dict = {
        'destination_key': None,
        'training_frame': None,
        'validation_frame': None,
        'ignored_columns': None,
        'score_each_iteration': None,
        'probs': None,
    }
    check_params_update_kwargs(params_dict, kwargs, 'quantiles', print_params)
    a = self.do_json_request('Quantiles.json', timeout=timeoutSecs, params=params_dict)
    verboseprint("\nquantiles result:", dump_json(a))
    h2o_sandbox.check_sandbox_for_errors()
    return a
示例#46
0
def delete_model(self, key, ignoreMissingKey=True, timeoutSecs=60, **kwargs):
    '''
    Delete a model on the h2o cluster, given its key.
    '''
    assert key is not None, '"key" parameter is null'

    result = self.do_json_request('/3/Models.json/' + key, cmd='delete', timeout=timeoutSecs)

    # TODO: look for what?
    if not ignoreMissingKey and 'f00b4r' in result:
        raise ValueError('Model key not found: ' + key)

    verboseprint("delete_model result:", dump_json(result))
    return result
示例#47
0
def delete_model(self, key, ignoreMissingKey=True, timeoutSecs=60, **kwargs):
    '''
    Delete a model on the h2o cluster, given its key.
    '''
    assert key is not None, '"key" parameter is null'

    result = self.do_json_request('/3/Models.json/' + key, cmd='delete', timeout=timeoutSecs)

    # TODO: look for what?
    if not ignoreMissingKey and 'f00b4r' in result:
        raise ValueError('Model key not found: ' + key)

    verboseprint("delete_model result:", dump_json(result))
    return result
示例#48
0
 def changeTokens(self, rows, tokenCase, tokenChangeDict):
     [cOpen,cClose] = tokenChangeDict[tokenCase]
     newRows = []
     for r in rows:
         # don't quote lines that start with #
         # can quote lines start with some spaces or tabs? maybe
         comment = re.match(r'^[ \t]*#', r)
         empty = re.match(r'^$',r)
         if not (comment or empty):
             r = re.sub('^',cOpen,r)
             r = re.sub('\|',cClose + '|' + cOpen,r)
             r = re.sub('$',cClose,r)
         verboseprint(r)
         newRows.append(r)
     return newRows
示例#49
0
    def __init__(self, nodeState):
        for k, v in nodeState.iteritems():
            verboseprint("init:", k, v)
            # hack because it looks like the json is currently created with "None" for values of None
            # rather than worrying about that, just translate "None" to None here. "None" shouldn't exist
            # for any other reason.
            if v == "None":
                v = None
            elif v == "false":
                v = False
            elif v == "true":
                v = True
                # leave "null" as-is (string) for now?

            setattr(self, k, v) # achieves self.k = v
示例#50
0
 def changeTokens(self, rows, tokenCase, tokenChangeDict):
     [cOpen, cClose] = tokenChangeDict[tokenCase]
     newRows = []
     for r in rows:
         # don't quote lines that start with #
         # can quote lines start with some spaces or tabs? maybe
         comment = re.match(r'^[ \t]*#', r)
         empty = re.match(r'^$', r)
         if not (comment or empty):
             r = re.sub('^', cOpen, r)
             r = re.sub('\|', cClose + '|' + cOpen, r)
             r = re.sub('$', cClose, r)
         verboseprint(r)
         newRows.append(r)
     return newRows
示例#51
0
    def test(n, tries=None, timeoutSecs=14.0):
        c = n.get_cloud(noSandboxErrorCheck=True, timeoutSecs=timeoutSecs)
        # don't want to check everything. But this will check that the keys are returned!
        consensus = c['consensus']
        locked = c['locked']

        # FIX! assume h2o-dev is off by 1
        cloud_size = c['cloud_size'] + 1

        cloud_name = c['cloud_name']

        if 'nodes' not in c:
            emsg = "\nH2O didn't include a list of nodes in get_cloud response after initial cloud build"
            raise Exception(emsg)

        # only print it when you get consensus
        if cloud_size != node_count:
            verboseprint("\nNodes in cloud while building:")
            print "kevin"
            for i,ci in enumerate(c['nodes']):
                verboseprint(i, ci['h2o']['node'])

        if (cloud_size > node_count):
            emsg = (
                "\n\nERROR: cloud_size: %d reported via json is bigger than we expect: %d" % \
                    (cloud_size, node_count) +
                "\nLikely have zombie(s) with the same cloud name on the network." +
                "\nLook at the cloud IP's in 'grep Paxos sandbox/*stdout*' for some IP's you didn't expect." +
                "\n\nYou probably don't have to do anything, as the cloud shutdown in this test should" +
                "\nhave sent a Shutdown.json to all in that cloud (you'll see a kill -2 in the *stdout*)." +
                "\nIf you try again, and it still fails, go to those IPs and kill the zombie h2o's." +
                "\nIf you think you really have an intermittent cloud build, report it." +
                "\n" +
                "\nbuilding cloud size of 2 with 127.0.0.1 may temporarily report 3 incorrectly," +
                "\nwith no zombie?"
            )
            for ci in c['nodes']:
                emsg += "\n" + ci['h2o']['node']
            raise Exception(emsg)

        a = (cloud_size == node_count) and consensus
        if a:
            verboseprint("\tLocked won't happen until after keys are written")
            verboseprint("\nNodes in final cloud:")
            for ci in c['nodes']:
                verboseprint(ci['h2o']['node'])

        return a
示例#52
0
def interaction(self, timeoutSecs=120, noPoll=False, **kwargs):
    # FIX! have to add legal params
    params_dict = {

    }
    check_params_update_kwargs(params_dict, kwargs, 'interaction', print_params=True)
    firstResult = self.do_json_request('3/Interaction.json', cmd='post', timeout=timeoutSecs, params=params_dict)
    job_key = firstResult['dest']['name']

    if noPoll:
        h2o_sandbox.check_sandbox_for_errors()
        return firstResult

    result = self.poll_job(job_key)
    verboseprint("interaction result:", dump_json(result))
    return result
示例#53
0
def put_file(self, f, key=None, timeoutSecs=60):
    if key is None:
        key = os.path.basename(f)
        ### print "putfile specifying this key:", key

    fileObj = open(f, 'rb')
    resp = self.do_json_request('3/PostFile.json',
                                cmd='post',
                                timeout=timeoutSecs,
                                params={"destination_key": key},
                                files={"file": fileObj},
                                extraComment=str(f))

    verboseprint("\nput_file response: ", dump_json(resp))
    fileObj.close()
    return key
示例#54
0
    def test(n, tries=None, timeoutSecs=14.0):
        c = n.get_cloud(noExtraErrorCheck=True, timeoutSecs=timeoutSecs)
        # don't want to check everything. But this will check that the keys are returned!
        consensus = c['consensus']
        locked = c['locked']
        cloud_size = c['cloud_size']
        cloud_name = c['cloud_name']
        node_name = c['node_name']

        if 'nodes' not in c:
            emsg = "\nH2O didn't include a list of nodes in get_cloud response after initial cloud build"
            raise Exception(emsg)

        # only print it when you get consensus
        if cloud_size != node_count:
            verboseprint("\nNodes in cloud while building:")
            for ci in c['nodes']:
                verboseprint(ci['name'])

        if cloud_size > node_count:
            emsg = (
                "\n\nERROR: cloud_size: %d reported via json is bigger than we expect: %d" % \
                    (cloud_size, node_count) +
                "\nLikely have zombie(s) with the same cloud name on the network." +
                "\nLook at the cloud IP's in 'grep Paxos sandbox/*stdout*' for some IP's you didn't expect." +
                "\n\nYou probably don't have to do anything, as the cloud shutdown in this test should" +
                "\nhave sent a Shutdown.json to all in that cloud (you'll see a kill -2 in the *stdout*)." +
                "\nIf you try again, and it still fails, go to those IPs and kill the zombie h2o's." +
                "\nIf you think you really have an intermittent cloud build, report it." +
                "\n" +
                "\nbuilding cloud size of 2 with 127.0.0.1 may temporarily report 3 incorrectly," +
                "\nwith no zombie?"
            )
            for ci in c['nodes']:
                emsg += "\n" + ci['name']
            raise Exception(emsg)

        a = (cloud_size == node_count) and consensus
        if a:
            verboseprint("\tLocked won't happen until after keys are written")
            verboseprint("\nNodes in final cloud:")
            for ci in c['nodes']:
                verboseprint(ci['name'])

        return a