def exec_list(exprList, lenNodes, csvFilename, hex_key, colX):
    h2e.exec_zero_list(zeroList)
    # start with trial = 1 because trial-1 is used to point to Result0 which must be initted
    trial = 1
    while (trial < 100):
        for exprTemplate in exprList:
            # do each expression at a random node, to facilate key movement
            nodeX = random.randint(0, lenNodes - 1)
            # billion rows only has two cols
            # colX is incremented in the fill_in_expr_template

            # FIX! should tune this for covtype20x vs 200x vs covtype.data..but for now
            row = str(random.randint(1, 400000))

            execExpr = h2e.fill_in_expr_template(exprTemplate, colX, trial,
                                                 row, hex_key)
            execResultInspect = h2e.exec_expr(h2o.nodes[nodeX],
                                              execExpr,
                                              resultKey="Result" + str(trial) +
                                              ".hex",
                                              timeoutSecs=60)

            h2o.check_sandbox_for_errors()
            print "Trial #", trial, "completed\n"
            trial += 1
def exec_list(exprList, lenNodes, csvFilename, key2):
        h2e.exec_zero_list(zeroList)
        # start with trial = 1 because trial-1 is used to point to Result0 which must be initted
        trial = 1
        while (trial < 100):
            for exprTemplate in exprList:
                # do each expression at a random node, to facilate key movement
                nodeX = random.randint(0,lenNodes-1)
                colX = random.randint(1,54)
                # FIX! should tune this for covtype20x vs 200x vs covtype.data..but for now
                row = str(random.randint(1,400000))

                execExpr = h2e.fill_in_expr_template(exprTemplate, colX, trial, row, key2)
                execResultInspect = h2e.exec_expr(h2o.nodes[nodeX], execExpr, 
                    resultKey="Result"+str(trial)+".hex", timeoutSecs=60)

                eri0 = execResultInspect[0]
                eri1 = execResultInspect[1]
                columns = eri0.pop('cols')
                columnsDict = columns[0]
                print "\nexecResult columns[0]:", h2o.dump_json(columnsDict)
                print "\nexecResult [0]:", h2o.dump_json(eri0)
                print "\nexecResult [1] :", h2o.dump_json(eri1)
                
                min = columnsDict["min"]
                h2o.verboseprint("min: ", min, "trial:", trial)
                ### self.assertEqual(float(min), float(trial),"what can we check here")

                ### h2b.browseJsonHistoryAsUrlLastMatch("Inspect")
                # slows things down to check every iteration, but good for isolation
                h2o.check_sandbox_for_errors()
                print "Trial #", trial, "completed\n"
                trial += 1
示例#3
0
    def test_exec_filter_slice2(self):
        timeoutSecs = 10
        csvFilename = "covtype.data"
        csvPathname = h2o.find_dataset('UCI/UCI-large/covtype/covtype.data')
        key2 = "c"
        parseKey = h2o_cmd.parseFile(None, csvPathname, 'covtype.data', 'c',
                                     10)
        print csvFilename, 'parse time:', parseKey['response']['time']
        print "Parse result['desination_key']:", parseKey['destination_key']
        inspect = h2o_cmd.runInspect(None, parseKey['destination_key'])

        for trial in range(10):
            print "Doing the execs in order, to feed filters into slices"
            nodeX = 0
            for exprTemplate in exprList:
                execExpr = h2e.fill_in_expr_template(exprTemplate,
                                                     colX=0,
                                                     n=0,
                                                     row=1,
                                                     key2=key2,
                                                     m=2)
                time.sleep(2)
                h2o.check_sandbox_for_errors()

                execResultInspect, min_value = h2e.exec_expr(
                    h2o.nodes[nodeX],
                    execExpr,
                    resultKey="Result.hex",
                    timeoutSecs=4)
                print "min_value:", min_value, "execExpr:", execExpr
                h2o.verboseprint("min: ", min_value, "trial:", trial)
示例#4
0
    def test_exec2_filter_slice(self):
        timeoutSecs = 10
        csvFilename = "covtype.data"
        csvPathname = 'standard/covtype.data'
        hex_key = "c.hex"
        parseResult = h2i.import_parse(bucket='home-0xdiag-datasets',
                                       path=csvPathname,
                                       schema='put',
                                       hex_key=hex_key,
                                       timeoutSecs=20)
        print "Parse result['desination_key']:", parseResult['destination_key']
        inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])

        for trial in range(10):
            print "Doing the execs in order, to feed filters into slices"
            nodeX = 0
            for exprTemplate in exprList:
                execExpr = h2e.fill_in_expr_template(exprTemplate,
                                                     colX=0,
                                                     n=0,
                                                     row=1,
                                                     keyX=hex_key,
                                                     m=2)
                execResultInspect, min_value = h2e.exec_expr(h2o.nodes[nodeX],
                                                             execExpr,
                                                             resultKey=None,
                                                             timeoutSecs=10)

                print "min_value:", min_value, "execExpr:", execExpr
                h2o.verboseprint("min: ", min_value, "trial:", trial)
示例#5
0
    def test_exec_filter_slice2(self):
        timeoutSecs = 10
        csvFilename = "covtype.data"
        csvPathname = 'UCI/UCI-large/covtype/covtype.data'
        hex_key = 'c'

        parseResult = h2i.import_parse(bucket='datasets', path=csvPathname, schema='put', hex_key=hex_key, 
            timeoutSecs=10)

        print csvFilename, 'parse time:', parseResult['response']['time']
        print "Parse result['desination_key']:", parseResult['destination_key']
        inspect = h2o_cmd.runInspect(None, parseResult['destination_key'])

        for trial in range(10):
            print "Doing the execs in order, to feed filters into slices"
            nodeX = 0
            for exprTemplate in exprList:
                execExpr = h2e.fill_in_expr_template(exprTemplate, colX=0, n=0, row=1, keyX=hex_key, m=2)
                time.sleep(2)
                h2o.check_sandbox_for_errors()

                execResultInspect, min_value = h2e.exec_expr(h2o.nodes[nodeX], execExpr, 
                    resultKey="Result.hex", timeoutSecs=4)
                print "min_value:", min_value, "execExpr:", execExpr
                h2o.verboseprint("min: ", min_value, "trial:", trial)
示例#6
0
    def test_exec2_rotate_inc(self):
        h2o.beta_features = True

        lenNodes = len(h2o.nodes)
        # zero the list of Results using node[0]
        # FIX! is the zerolist not eing seen correctl? is it not initializing to non-zero?
        for exprTemplate in initList:
            execExpr = h2e.fill_in_expr_template(exprTemplate, n=0, m=0)
            print execExpr
            execResult = h2e.exec_expr(h2o.nodes[0], execExpr)
            ### print "\nexecResult:", execResult

        period = 10
        # start at result10, to allow goback of 10
        trial = 0
        while (trial < 200):
            for exprTemplate in exprList:
                # for the first 100 trials: do each expression at node 0,
                # for the second 100 trials: do each expression at a random node, to facilate key movement
                # FIX! there's some problem with the initList not taking if rotated amongst nodes?
                if (DO_ONE_NODE_ONLY or trial < 100):
                    nodeX = 0
                else:
                    nodeX = random.randint(0,lenNodes-1)
                ### print nodeX
                
                number = trial + 10
                resultKey="Result" + str(number%period)
                execExpr = h2e.fill_in_expr_template(exprTemplate, n=(number%period), m=((number-goback)%period))
                execResultInspect, min_value = h2e.exec_expr(h2o.nodes[nodeX], execExpr,
                    resultKey=None, timeoutSecs=4)

                print "min_value:", min_value, "execExpr:", execExpr, "number:", number
                h2o.verboseprint("min: ", min_value, "trial:", trial)
                self.assertEqual(int(min_value), int(number))
                # we're talking to just one node. ignore this comment
                #    'Although the memory model allows write atomicity to be violated,' +
                #    'this test was passing with an assumption of multi-jvm write atomicity' + 
                #    'Be interesting if ever fails. Can disable assertion if so, and run without check')
#
                ### h2b.browseJsonHistoryAsUrlLastMatch("Inspect")
                trial += 1
示例#7
0
    def test_exec2_rotate_inc(self):

        lenNodes = len(h2o.nodes)
        # zero the list of Results using node[0]
        # FIX! is the zerolist not eing seen correctl? is it not initializing to non-zero?
        for exprTemplate in initList:
            execExpr = h2e.fill_in_expr_template(exprTemplate, n=0, m=0)
            print execExpr
            execResult = h2e.exec_expr(h2o.nodes[0], execExpr)
            ### print "\nexecResult:", execResult

        period = 10
        # start at result10, to allow goback of 10
        trial = 0
        while (trial < 200):
            for exprTemplate in exprList:
                # for the first 100 trials: do each expression at node 0,
                # for the second 100 trials: do each expression at a random node, to facilate key movement
                # FIX! there's some problem with the initList not taking if rotated amongst nodes?
                if (DO_ONE_NODE_ONLY or trial < 100):
                    nodeX = 0
                else:
                    nodeX = random.randint(0,lenNodes-1)
                ### print nodeX
                
                number = trial + 10
                resultKey="Result" + str(number%period)
                execExpr = h2e.fill_in_expr_template(exprTemplate, n=(number%period), m=((number-goback)%period))
                execResultInspect, min_value = h2e.exec_expr(h2o.nodes[nodeX], execExpr,
                    resultKey=None, timeoutSecs=4)

                print "min_value:", min_value, "execExpr:", execExpr, "number:", number
                h2o.verboseprint("min: ", min_value, "trial:", trial)
                self.assertEqual(int(min_value), int(number))
                # we're talking to just one node. ignore this comment
                #    'Although the memory model allows write atomicity to be violated,' +
                #    'this test was passing with an assumption of multi-jvm write atomicity' + 
                #    'Be interesting if ever fails. Can disable assertion if so, and run without check')
#
                ### h2b.browseJsonHistoryAsUrlLastMatch("Inspect")
                trial += 1
def exec_list(exprList, lenNodes, csvFilename, hex_key):
        h2e.exec_zero_list(zeroList)
        # start with trial = 1 because trial-1 is used to point to Result0 which must be initted
        trial = 1
        while (trial < 100):
            for exprTemplate in exprList:
                # do each expression at a random node, to facilate key movement
                nodeX = random.randint(0,lenNodes-1)
                colX = random.randint(1,54)
                # FIX! should tune this for covtype20x vs 200x vs covtype.data..but for now
                row = str(random.randint(1,400000))

                execExpr = h2e.fill_in_expr_template(exprTemplate, colX, trial, row, hex_key)
                execResultInspect = h2e.exec_expr(h2o.nodes[nodeX], execExpr, 
                    resultKey="Result"+str(trial)+".hex", timeoutSecs=60)

                h2o.check_sandbox_for_errors()
                print "Trial #", trial, "completed\n"
                trial += 1
示例#9
0
    def test_exec2_filter_slice(self):
        h2o.beta_features = True
        timeoutSecs = 10
        csvFilename = "covtype.data"
        csvPathname = "standard/covtype.data"
        hex_key = "c.hex"
        parseResult = h2i.import_parse(
            bucket="home-0xdiag-datasets", path=csvPathname, schema="put", hex_key=hex_key, timeoutSecs=10
        )
        print "Parse result['desination_key']:", parseResult["destination_key"]
        inspect = h2o_cmd.runInspect(None, parseResult["destination_key"])

        for trial in range(10):
            print "Doing the execs in order, to feed filters into slices"
            nodeX = 0
            for exprTemplate in exprList:
                execExpr = h2e.fill_in_expr_template(exprTemplate, colX=0, n=0, row=1, keyX=hex_key, m=2)
                execResultInspect, min_value = h2e.exec_expr(h2o.nodes[nodeX], execExpr, resultKey=None, timeoutSecs=4)

                print "min_value:", min_value, "execExpr:", execExpr
                h2o.verboseprint("min: ", min_value, "trial:", trial)
示例#10
0
    def test_exec_filter_slice(self):
        timeoutSecs = 10
        csvFilename = "covtype.data"
        csvPathname = h2o.find_dataset('UCI/UCI-large/covtype/covtype.data')
        key2 = "c"
        parseKey = h2o_cmd.parseFile(None, csvPathname, 'covtype.data', 'c', 10)
        print csvFilename, 'parse time:', parseKey['response']['time']
        print "Parse result['desination_key']:", parseKey['destination_key']
        inspect = h2o_cmd.runInspect(None, parseKey['destination_key'])

        for trial in range(10):
            print "Doing the execs in order, to feed filters into slices"
            nodeX = 0
            for exprTemplate in exprList:
                execExpr = h2e.fill_in_expr_template(exprTemplate, colX=0, n=0, row=1, key2=key2, m=2)

                execResultInspect, min_value = h2e.exec_expr(h2o.nodes[nodeX], execExpr, 
                    resultKey="Result.hex", timeoutSecs=4)

                print "min_value:", min_value, "execExpr:", execExpr
                h2o.verboseprint("min: ", min_value, "trial:", trial)
def exec_list(exprList, lenNodes, csvFilename, key2):
    h2e.exec_zero_list(zeroList)
    # start with trial = 1 because trial-1 is used to point to Result0 which must be initted
    trial = 1
    while (trial < 100):
        for exprTemplate in exprList:
            # do each expression at a random node, to facilate key movement
            nodeX = random.randint(0, lenNodes - 1)
            colX = random.randint(1, 54)
            # FIX! should tune this for covtype20x vs 200x vs covtype.data..but for now
            row = str(random.randint(1, 400000))

            execExpr = h2e.fill_in_expr_template(exprTemplate, colX, trial,
                                                 row, key2)
            execResultInspect = h2e.exec_expr(h2o.nodes[nodeX],
                                              execExpr,
                                              resultKey="Result" + str(trial) +
                                              ".hex",
                                              timeoutSecs=60)

            eri0 = execResultInspect[0]
            eri1 = execResultInspect[1]
            columns = eri0.pop('cols')
            columnsDict = columns[0]
            print "\nexecResult columns[0]:", h2o.dump_json(columnsDict)
            print "\nexecResult [0]:", h2o.dump_json(eri0)
            print "\nexecResult [1] :", h2o.dump_json(eri1)

            min = columnsDict["min"]
            h2o.verboseprint("min: ", min, "trial:", trial)
            ### self.assertEqual(float(min), float(trial),"what can we check here")

            ### h2b.browseJsonHistoryAsUrlLastMatch("Inspect")
            # slows things down to check every iteration, but good for isolation
            h2o.check_sandbox_for_errors()
            print "Trial #", trial, "completed\n"
            trial += 1