Exemplo n.º 1
0
    def test_exec2_xorsum(self):
        h2o.beta_features = True
        SYNDATASETS_DIR = h2o.make_syn_dir()

        tryList = [
            (10000, 1, 'r1', 0, 10, None),
        ]

        ullResultList = []
        for (rowCount, colCount, hex_key, expectedMin, expectedMax, expected) in tryList:
            SEEDPERFILE = random.randint(0, sys.maxint)
            # dynamic range of the data may be useful for estimating error
            maxDelta = expectedMax - expectedMin

            csvFilename = 'syn_real_' + str(rowCount) + 'x' + str(colCount) + '.csv'
            csvPathname = SYNDATASETS_DIR + '/' + csvFilename
            csvPathnameFull = h2i.find_folder_and_filename(None, csvPathname, returnFullPath=True)
            print "Creating random", csvPathname
            (expectedUll, expectedFpSum)  = write_syn_dataset(csvPathname, 
                rowCount, colCount, expectedMin, expectedMax, SEEDPERFILE)

            parseResult = h2i.import_parse(path=csvPathname, schema='local', hex_key=hex_key, 
                timeoutSecs=3000, retryDelaySecs=2)
            inspect = h2o_cmd.runInspect(key=hex_key)
            print "numRows:", inspect['numRows']
            print "numCols:", inspect['numCols']
            inspect = h2o_cmd.runInspect(key=hex_key, offset=-1)
            print "inspect offset = -1:", h2o.dump_json(inspect)

            
            # looking at the 8 bytes of bits for the h2o doubles
            # xorsum will zero out the sign and exponent
            for execExpr in exprList:
                start = time.time()
                (execResult, fpResult) = h2e.exec_expr(h2o.nodes[0], execExpr, 
                    resultKey=None, timeoutSecs=300)
                print 'exec took', time.time() - start, 'seconds'
                print "execResult:", h2o.dump_json(execResult)
                print ""
                print "%30s" % "fpResult:", "%.15f" % fpResult
                ullResult = h2o_util.doubleToUnsignedLongLong(fpResult)
                print "%30s" % "bitResult (0.16x):", "0x%0.16x" % ullResult
                print "%30s" % "expectedUll (0.16x):", "0x%0.16x" % expectedUll
                # print "%30s" % "hex(bitResult):", hex(ullResult)
                ullResultList.append((ullResult, fpResult))

            h2o.check_sandbox_for_errors()

            print "first result was from a sum. others are xorsum"
            print "ullResultList:"
            for ullResult, fpResult in ullResultList:
                print "%30s" % "ullResult (0.16x):", "0x%0.16x   %s" % (ullResult, fpResult)
            expectedUllAsDouble = h2o_util.unsignedLongLongToDouble(expectedUll)
            print "%30s" % "expectedUll (0.16x):", "0x%0.16x   %s" % (expectedUll, expectedUllAsDouble)
            expectedFpSumAsLongLong = h2o_util.doubleToUnsignedLongLong(expectedFpSum)
            print "%30s" % "expectedFpSum (0.16x):", "0x%0.16x   %s" % (expectedFpSumAsLongLong, expectedFpSum)
Exemplo n.º 2
0
def write_syn_dataset(csvPathname, rowCount, colCount, expectedMin, expectedMax, SEEDPERFILE):
    dsf = open(csvPathname, 'w')
    expectedRange = (expectedMax - expectedMin)
    expectedFpSum = float(0)
    expectedUllSum = int(0)
    for row in range(rowCount):
        rowData = []
        for j in range(colCount):
            value = expectedMin + (random.random() * expectedRange)
            r = random.randint(0,1)
            if False and r==0:
                value = -1 * value
            # hack
            if 1==1:
                # value = row * 2

                # bad sum
                # value = 5555555555555 + row
                # bad
                # value = 555555555555 + row
                # value = 55555555555 + row

                # fail
                # value = 5555555555 + row
                rexp = random.randint(0,20)
                value = 2.0**rexp + 3.0*row


                r = random.randint(0,1)
                if r==0:
                    value = -1 * value

                # value = -1 * value
                # value = 2e9 + row
                # value = 3 * row

            # get the expected patterns from python
            fpResult = float(value)
            ullResult = h2o_util.doubleToUnsignedLongLong(fpResult)
            expectedFpSum += fpResult
            expectedUllSum = expectedUllSum ^ ullResult
            # print "%30s" % "expectedUll (0.16x):", "0x%0.16x" % expectedUll

            # Now that you know how many decimals you want, 
            # say, 15, just use a rstrip("0") to get rid of the unnecessary 0s:
            # can't rstrip, because it gets rid of trailing exponents  like +0 which causes NA if + 
            s = "%.16f" % value
            rowData.append(s)

        rowDataCsv = ",".join(map(str,rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
    # print hex(~(0xf << 60))
    # zero 4 bits of sign/exponent like h2o does, to prevent inf/nan
    expectedUllSum = expectedUllSum & ~(0xf << 60)
    return (expectedUllSum, expectedFpSum)
Exemplo n.º 3
0
def write_syn_dataset(csvPathname, rowCount, colCount, expectedMin,
                      expectedMax, SEEDPERFILE):
    dsf = open(csvPathname, 'w')
    expectedRange = (expectedMax - expectedMin)
    expectedFpSum = float(0)
    expectedUllSum = int(0)
    for row in range(rowCount):
        rowData = []
        for j in range(colCount):
            value = expectedMin + (random.random() * expectedRange)
            r = random.randint(0, 1)
            if False and r == 0:
                value = -1 * value
            # hack
            if 1 == 1:
                # value = row * 2

                # bad sum
                # value = 5555555555555 + row
                # bad
                # value = 555555555555 + row
                # value = 55555555555 + row

                # fail
                # value = 5555555555 + row
                rexp = random.randint(0, 20)
                value = 2.0**rexp + 3.0 * row

                r = random.randint(0, 1)
                if r == 0:
                    value = -1 * value

                # value = -1 * value
                # value = 2e9 + row
                # value = 3 * row

            # get the expected patterns from python
            fpResult = float(value)
            ullResult = h2o_util.doubleToUnsignedLongLong(fpResult)
            expectedFpSum += fpResult
            expectedUllSum = expectedUllSum ^ ullResult
            # print "%30s" % "expectedUll (0.16x):", "0x%0.16x" % expectedUll

            # Now that you know how many decimals you want,
            # say, 15, just use a rstrip("0") to get rid of the unnecessary 0s:
            # can't rstrip, because it gets rid of trailing exponents  like +0 which causes NA if +
            s = "%.16f" % value
            rowData.append(s)

        rowDataCsv = ",".join(map(str, rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
    # print hex(~(0xf << 60))
    # zero 4 bits of sign/exponent like h2o does, to prevent inf/nan
    expectedUllSum = expectedUllSum & ~(0xf << 60)
    return (expectedUllSum, expectedFpSum)
Exemplo n.º 4
0
def write_syn_dataset(csvPathname, rowCount, colCount, expectedMin, expectedMax, SEEDPERFILE):
    dsf = open(csvPathname, 'w')
    expectedRange = (expectedMax - expectedMin)
    expectedFpSum = 0.0
    for row in range(rowCount):
        rowData = []
        for j in range(colCount):
            value = expectedMin + (random.random() * expectedRange)
            if 1==1:
                # value = row * 2

                # bad sum
                # value = 5555555555555 + row
                # bad
                # value = 555555555555 + row
                # value = 55555555555 + row

                # fail
                # value = 5555555555 + row
                # exp = random.randint(0,120)
                # 50 bad?
                exp = random.randint(0,10)
                value = 3 * (2 ** exp) 

                # value = -1 * value
                # value = 2e9 + row
                # value = 3 * row
            r = random.randint(0,1)
            if False and r==0:
                value = -1 * value
            # hack

            # get the expected patterns from python
            fpResult = float(value)
            expectedUll = h2o_util.doubleToUnsignedLongLong(fpResult)
            expectedFpSum += fpResult
            # print "%30s" % "expectedUll (0.16x):", "0x%0.16x" % expectedUll

            # Now that you know how many decimals you want, 
            # say, 15, just use a rstrip("0") to get rid of the unnecessary 0s:
            # s = ("%.16e" % value).rstrip("0")
            s = ("%.16e" % value)
            rowData.append(s)

        rowDataCsv = ",".join(map(str,rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
    return (expectedUll, expectedFpSum)
Exemplo n.º 5
0
    def test_exec2_xorsum(self):
        SYNDATASETS_DIR = h2o.make_syn_dir()

        tryList = [
            (ROWS, 1, 'r1', 0, 10, None),
        ]

        for trial in range(10):
            ullResultList = []
            for (rowCount, colCount, hex_key, expectedMin, expectedMax, expected) in tryList:
                SEEDPERFILE = random.randint(0, sys.maxint)
                # dynamic range of the data may be useful for estimating error
                maxDelta = expectedMax - expectedMin

                csvFilename = 'syn_real_' + str(rowCount) + 'x' + str(colCount) + '.csv'
                csvPathname = SYNDATASETS_DIR + '/' + csvFilename
                csvPathnameFull = h2i.find_folder_and_filename(None, csvPathname, returnFullPath=True)
                print "Creating random", csvPathname
                (expectedUllSum, expectedFpSum)  = write_syn_dataset(csvPathname, 
                    rowCount, colCount, expectedMin, expectedMax, SEEDPERFILE)
                expectedUllSumAsDouble = h2o_util.unsignedLongLongToDouble(expectedUllSum)
                expectedFpSumAsLongLong = h2o_util.doubleToUnsignedLongLong(expectedFpSum)

                parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key, 
                    timeoutSecs=3000, retryDelaySecs=2)
                numRows, numCols, parse_key = h2o_cmd.infoFromParse(parseResult)
                assert parse_key == hex_key
                assert numCols == colCount
                assert numRows == rowCount

                inspect = h2o_cmd.runInspect(key=hex_key)
                missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(inspect)
                assert len(missingList) == 0

                # looking at the 8 bytes of bits for the h2o doubles
                # xorsum will zero out the sign and exponent
                for execExpr in exprList:
                    for r in range(10):
                        start = time.time()
                        execResult = h2o_cmd.runExec(ast=execExpr, timeoutSecs=30)
                        fpResult = execResult['scalar']
                        # (execResult, fpResult) = h2e.exec_expr(h2o.nodes[0], execExpr, resultKey='h', timeoutSecs=300)
                        print r, 'exec took', time.time() - start, 'seconds'
                        print r, "execResult:", h2o.dump_json(execResult)
                        h2o_cmd.runStoreView()

                        ullResult = h2o_util.doubleToUnsignedLongLong(fpResult)
                        ullResultList.append((ullResult, fpResult))

                        print "%30s" % "ullResult (0.16x):", "0x%0.16x   %s" % (ullResult, fpResult)
                        print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x   %s" % (expectedUllSum, expectedUllSumAsDouble)
                        print "%30s" % "expectedFpSum (0.16x):", "0x%0.16x   %s" % (expectedFpSumAsLongLong, expectedFpSum)

                        # allow diff of the lsb..either way
                        # if ullResult!=expectedUllSum and abs((ullResult-expectedUllSum)>3):
                        if ullResult!=expectedUllSum:
                            raise Exception("h2o didn't get the same xorsum as python. 0x%0.16x 0x%0.16x" % (ullResult, expectedUllSum))
                            print "h2o didn't get the same xorsum as python. 0x%0.16x 0x%0.16x" % (ullResult, expectedUllSum)

                h2o.check_sandbox_for_errors()

                print "first result was from a sum. others are xorsum"
                print "ullResultList:"
                for ullResult, fpResult in ullResultList:
                    print "%30s" % "ullResult (0.16x):", "0x%0.16x   %s" % (ullResult, fpResult)

                print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x   %s" % (expectedUllSum, expectedUllSumAsDouble)
                print "%30s" % "expectedFpSum (0.16x):", "0x%0.16x   %s" % (expectedFpSumAsLongLong, expectedFpSum)
Exemplo n.º 6
0
    def test_parse_multiprocess_fvec(self):
        h2o.beta_features = True
        # hdfs://<name node>/datasets/manyfiles-nflx-gz/file_1.dat.gz
        # don't raise exception if we find something bad in h2o stdout/stderr?
        # h2o.nodes[0].sandboxIgnoreErrors = True
        OUTSTANDING = min(10, len(h2o.nodes))

        if DO_IRIS:
            global DO_BIGFILE
            DO_BIGFILE = False
            bucket = 'smalldata'
            importFolderPath = "iris"
            csvFilename = "iris2.csv"
            csvFilePattern = "iris2.csv"
            if localhost:
                trialMax = 20
            else:
                trialMax = 100
        elif DO_BIGFILE:
            bucket = 'home-0xdiag-datasets'
            importFolderPath = "standard"
            csvFilename = "covtype20x.data"
            csvFilePattern = "covtype20x.data"
            trialMax = 2 * OUTSTANDING
        else:
            bucket = 'home-0xdiag-datasets'
            importFolderPath = "standard"
            csvFilename = "covtype.data"
            csvFilePattern = "covtype.data"
            trialMax = 40 * OUTSTANDING

        # add one just to make it odd
        # OUTSTANDING = min(10, len(h2o.nodes) + 1)
        # don't have more than one source file per node OUTSTANDING? (think of the node increment rule)
    
        # okay to reuse the src_key name. h2o deletes? use unique hex to make sure it's not reused.
        # might go to unique src keys also ..oops have to, to prevent complaints about the key (lock)
        # can't repeatedly import the folder

        # only if not noPoll. otherwise parse isn't done
        # I guess I have to use 'put' so I can name the src key unique, to get overlap
        # I could tell h2o to not delete, but it's nice to get the keys in a new place?
        # maybe rebalance? FIX! todo

        parseTrial = 0
        summaryTrial = 0
        uploader_resultq = multiprocessing.Queue()
        while parseTrial <= trialMax:
            start = time.time()
            uploaders = []
            if not DO_IRIS:
                assert OUTSTANDING<=10 , "we only have 10 links with unique names to covtype.data"
            for o in range(OUTSTANDING):
                src_key = csvFilename + "_" + str(parseTrial) 
                hex_key = csvFilename + "_" + str(parseTrial) + ".hexxx"
                # "key": "hdfs://192.168.1.176/datasets/manyfiles-nflx-gz/file_99.dat.gz", 

                # hacked hard ln so source keys would have different names? was getting h2o locking issues
                if DO_IRIS:
                    csvPathname = importFolderPath + "/" + csvFilePattern
                else:
                    csvPathname = importFolderPath + "/" + csvFilePattern + "_" + str(o)
                start = time.time()

                # walk the nodes
                # if this rule is matched for exec/summary below, it should find the name okay? (npe with xorsum)
                # summary2 not seeing it?
                np = parseTrial % len(h2o.nodes)
                retryDelaySecs=5 if DO_BIGFILE else 1
                timeoutSecs=60 if DO_BIGFILE else 15
                tmp = multiprocessing.Process(target=function_no_keyboard_intr,
                    args=(uploader_resultq, uploadit, np, bucket, csvPathname, src_key, hex_key, timeoutSecs, retryDelaySecs))
                tmp.start()
                uploaders.append(tmp)
                parseTrial += 1

            # now sync on them
            for uploader in uploaders:
                try:
                    uploader.join()
                    # don't need him any more
                    uploader.terminate()
                    (importPattern, hex_key) = uploader_resultq.get(timeout=10)
                except KeyboardInterrupt:
                    print 'parent received ctrl-c'
                    for uploader in uploaders:
                        uploader.terminate()
                        uploader.join()
            elapsed = time.time() - start
            print "Parse group end at #", parseTrial, "completed in", "%6.2f" % elapsed, "seconds.", \
                "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)

        print "We might have parses that haven't completed. The join just says we can reuse some files (parse still going)"
        if PARSE_NOPOLL:
            h2o_jobs.pollWaitJobs(timeoutSecs=180)

        h2o_cmd.runStoreView()
        # h2o_jobs.pollStatsWhileBusy(timeoutSecs=300, pollTimeoutSecs=15, retryDelaySecs=0.25)

        if DO_PARSE_ALSO: # only if we parsed
            print "These all go to node [0]"
            # getting a NPE if I do xorsum (any exec?) ..just do summary for now..doesn't seem to have the issue
            # suspect it's about the multi-node stuff above
            for summaryTrial in range(trialMax):

                # do last to first..to get race condition?
                firstXorUll = None
                firstQuantileUll = None
                hex_key = csvFilename + "_" + str(summaryTrial) + ".hexxx"
                
                if DO_EXEC_QUANT:
                    execExpr = "r2=c(1); r2=quantile(%s[,1], c(%s));" % (hex_key, thresholds)
                    (resultExec, fpResult) = h2e.exec_expr(execExpr=execExpr, timeoutSecs=30)
                    ullResult = h2o_util.doubleToUnsignedLongLong(fpResult)
                    print "%30s" % "median ullResult (0.16x):", "0x%0.16x   %s" % (ullResult, fpResult)
                    if firstQuantileUll:
                        self.assertEqual(ullResult, firstQuantileUll)
                    else:
                        firstQuantileUll = ullResult

                if DO_XORSUM:
                    execExpr = "r2=c(1); r2=xorsum(%s[,1], c(%s));" % (hex_key, thresholds)
                    (resultExec, fpResult) = h2e.exec_expr(execExpr=execExpr, timeoutSecs=30)
                    ullResult = h2o_util.doubleToUnsignedLongLong(fpResult)
                    print "%30s" % "xorsum ullResult (0.16x):", "0x%0.16x   %s" % (ullResult, fpResult)

                    if firstXorUll:
                        self.assertEqual(ullResult, firstXorUll)
                    else:
                        firstXorUll = ullResult

                if DO_SUMMARY:
                    h2o_cmd.runSummary(key=hex_key)
Exemplo n.º 7
0
def write_syn_dataset(csvPathname, rowCount, colCount, expectedMin, expectedMax, SEEDPERFILE, sel):
    dsf = open(csvPathname, 'w')
    expectedRange = (expectedMax - expectedMin)
    expectedFpSum = float(0)
    expectedUllSum = int(0)
    for row in range(rowCount):
        rowData = []
        for j in range(colCount):
            value = expectedMin + (random.random() * expectedRange)
            if 1==1:
                # value = row * 2

                # bad sum
                # value = 5555555555555 + row
                # bad
                # value = 555555555555 + row
                # value = 55555555555 + row

                # fail
                # value = 5555555555 + row
                # exp = random.randint(0,120)
                # 50 bad?

                # constrain the dynamic range of the numbers to be within IEEE-754 support
                # without loss of precision when adding. Why do we care though?
                # could h2o compress if values are outside that kind of dynamic range ?

                # we want a big exponent?
                exp = random.randint(0,50)
                value = random.random() + (2 ** exp) 

                # value = -1 * value
                # value = 2e9 + row
                # value = 3 * row
            r = random.randint(0,1)
            if False and r==0:
                value = -1 * value
            # hack

            # print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x" % expectedUllSum

            # Now that you know how many decimals you want, 
            # say, 15, just use a rstrip("0") to get rid of the unnecessary 0s:
            # fix. can't rstrip if .16e is used because trailing +00 becomes +, causes NA
            if 1==0:
                # get the expected patterns from python
                fpResult = float(value)
                expectedUllSum ^= h2o_util.doubleToUnsignedLongLong(fpResult)
                expectedFpSum += fpResult
                s = ("%.16f" % value).rstrip("0")
                # since we're printing full fp precision always here, we shouldn't have 
                # to suck the formatted fp string (shorter?) back in
            # use a random fp format (string). use sel to force one you like
            else:
                NUM_CASES = h2o_util.fp_format()
                # s = h2o_util.fp_format(value, sel=None) # random
                s = h2o_util.fp_format(value, sel=sel) # use same case for all numbers
                # now our string formatting will lead to different values when we parse and use it 
                # so we move the expected value generation down here..i.e after we've formatted the string
                # we'll suck it back in as a fp number
                # get the expected patterns from python
                fpResult = float(s)
                expectedUllSum ^= h2o_util.doubleToUnsignedLongLong(fpResult)
                expectedFpSum += fpResult
            # s = ("%.16e" % value)
            rowData.append(s)

        rowDataCsv = ",".join(map(str,rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
    # zero the upper 4 bits of xorsum like h2o does to prevent inf/nan
    # print hex(~(0xf << 60))
    expectedUllSum &= (~(0xf << 60))
    return (expectedUllSum, expectedFpSum)
Exemplo n.º 8
0
    def test_exec2_xorsum2(self):
        h2o.beta_features = True
        SYNDATASETS_DIR = h2o.make_syn_dir()
        tryList = [
            (ROWS, 1, 'r1', 0, 10, None),
        ]

        for trial in range(20):
            ullResultList = []
            NUM_FORMAT_CASES = h2o_util.fp_format()
            for (rowCount, colCount, hex_key, expectedMin, expectedMax, expected) in tryList:
                SEEDPERFILE = random.randint(0, sys.maxint)
                # dynamic range of the data may be useful for estimating error
                maxDelta = expectedMax - expectedMin

                csvFilename = 'syn_real_' + str(rowCount) + 'x' + str(colCount) + '.csv'
                csvPathname = SYNDATASETS_DIR + '/' + csvFilename
                csvPathnameFull = h2i.find_folder_and_filename(None, csvPathname, returnFullPath=True)
                print "Creating random", csvPathname

                sel = random.randint(0, NUM_FORMAT_CASES-1)
                (expectedUllSum, expectedFpSum)  = write_syn_dataset(csvPathname, 
                    rowCount, colCount, expectedMin, expectedMax, SEEDPERFILE, sel)
                expectedUllSumAsDouble = h2o_util.unsignedLongLongToDouble(expectedUllSum)
                expectedFpSumAsLongLong = h2o_util.doubleToUnsignedLongLong(expectedFpSum)

                parseResult = h2i.import_parse(path=csvPathname, schema='local', hex_key=hex_key, 
                    timeoutSecs=3000, retryDelaySecs=2)
                inspect = h2o_cmd.runInspect(key=hex_key)
                print "numRows:", inspect['numRows']
                print "numCols:", inspect['numCols']
                inspect = h2o_cmd.runInspect(key=hex_key, offset=-1)
                print "inspect offset = -1:", h2o.dump_json(inspect)

                
                # looking at the 8 bytes of bits for the h2o doubles
                # xorsum will zero out the sign and exponent
                for execExpr in exprList:
                    start = time.time()
                    (execResult, fpResult) = h2e.exec_expr(h2o.nodes[0], execExpr, 
                        resultKey=None, timeoutSecs=300)
                    print 'exec took', time.time() - start, 'seconds'
                    print "execResult:", h2o.dump_json(execResult)
                    ullResult = h2o_util.doubleToUnsignedLongLong(fpResult)
                    ullResultList.append((ullResult, fpResult))

                    print "%30s" % "ullResult (0.16x):", "0x%0.16x   %s" % (ullResult, fpResult)
                    print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x   %s" % (expectedUllSum, expectedUllSumAsDouble)
                    print "%30s" % "expectedFpSum (0.16x):", "0x%0.16x   %s" % (expectedFpSumAsLongLong, expectedFpSum)

                    # allow diff of the lsb..either way. needed when integers are parsed

                    # okay for a couple of lsbs to be wrong, due to conversion from stringk
                    # ullResult (0.16x): 0x02c1a21f923cee96   2.15698793923e-295
                    # expectedUllSum (0.16x): 0x02c1a21f923cee97   2.15698793923e-295
                    # expectedFpSum (0.16x): 0x42f054af32b3c408   2.87294442126e+14

                    # ullResult and expectedUllSum are Q ints, (64-bit) so can subtract them.
                    # I guess we don't even care about sign, since we zero the first 4 bits (xorsum) to avoid nan/inf issues
                    ALLOWED_BIT_ERR = 0x1f # seeing this amount of error!
                    if ullResult!=expectedUllSum and (abs(ullResult-expectedUllSum)>ALLOWED_BIT_ERR):
                        raise Exception("h2o didn't get the same xorsum as python. 0x%0.16x 0x%0.16x" % (ullResult, expectedUllSum))
                        print "h2o didn't get the same xorsum as python. 0x%0.16x 0x%0.16x" % (ullResult, expectedUllSum)

                    # print "%30s" % "hex(bitResult):", hex(ullResult)

                h2o.check_sandbox_for_errors()

                print "first result was from a sum. others are xorsum"
                print "ullResultList:"
                for ullResult, fpResult in ullResultList:
                    print "%30s" % "ullResult (0.16x):", "0x%0.16x   %s" % (ullResult, fpResult)

                print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x   %s" % (expectedUllSum, expectedUllSumAsDouble)
                print "%30s" % "expectedFpSum (0.16x):", "0x%0.16x   %s" % (expectedFpSumAsLongLong, expectedFpSum)
Exemplo n.º 9
0
    def test_exec2_xorsum(self):
        h2o.beta_features = True
        SYNDATASETS_DIR = h2o.make_syn_dir()

        tryList = [
            (ROWS, 1, 'r1', 0, 10, None),
        ]

        for trial in range(10):
            ullResultList = []
            for (rowCount, colCount, hex_key, expectedMin, expectedMax,
                 expected) in tryList:
                SEEDPERFILE = random.randint(0, sys.maxint)
                # dynamic range of the data may be useful for estimating error
                maxDelta = expectedMax - expectedMin

                csvFilename = 'syn_real_' + str(rowCount) + 'x' + str(
                    colCount) + '.csv'
                csvPathname = SYNDATASETS_DIR + '/' + csvFilename
                csvPathnameFull = h2i.find_folder_and_filename(
                    None, csvPathname, returnFullPath=True)
                print "Creating random", csvPathname
                (expectedUllSum,
                 expectedFpSum) = write_syn_dataset(csvPathname, rowCount,
                                                    colCount, expectedMin,
                                                    expectedMax, SEEDPERFILE)
                expectedUllSumAsDouble = h2o_util.unsignedLongLongToDouble(
                    expectedUllSum)
                expectedFpSumAsLongLong = h2o_util.doubleToUnsignedLongLong(
                    expectedFpSum)

                parseResult = h2i.import_parse(path=csvPathname,
                                               schema='put',
                                               hex_key=hex_key,
                                               timeoutSecs=3000,
                                               retryDelaySecs=2)
                inspect = h2o_cmd.runInspect(key=hex_key)
                print "numRows:", inspect['numRows']
                print "numCols:", inspect['numCols']
                inspect = h2o_cmd.runInspect(key=hex_key, offset=-1)
                print "inspect offset = -1:", h2o.dump_json(inspect)

                # looking at the 8 bytes of bits for the h2o doubles
                # xorsum will zero out the sign and exponent
                for execExpr in exprList:
                    for r in range(10):
                        start = time.time()
                        (execResult, fpResult) = h2e.exec_expr(h2o.nodes[0],
                                                               execExpr,
                                                               resultKey='h',
                                                               timeoutSecs=300)
                        print r, 'exec took', time.time() - start, 'seconds'
                        print r, "execResult:", h2o.dump_json(execResult)
                        h2o_cmd.runStoreView()
                        ullResult = h2o_util.doubleToUnsignedLongLong(fpResult)
                        ullResultList.append((ullResult, fpResult))

                        print "%30s" % "ullResult (0.16x):", "0x%0.16x   %s" % (
                            ullResult, fpResult)
                        print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x   %s" % (
                            expectedUllSum, expectedUllSumAsDouble)
                        print "%30s" % "expectedFpSum (0.16x):", "0x%0.16x   %s" % (
                            expectedFpSumAsLongLong, expectedFpSum)

                        # allow diff of the lsb..either way
                        # if ullResult!=expectedUllSum and abs((ullResult-expectedUllSum)>3):
                        if ullResult != expectedUllSum:
                            raise Exception(
                                "h2o didn't get the same xorsum as python. 0x%0.16x 0x%0.16x"
                                % (ullResult, expectedUllSum))
                            print "h2o didn't get the same xorsum as python. 0x%0.16x 0x%0.16x" % (
                                ullResult, expectedUllSum)

                h2o.check_sandbox_for_errors()

                print "first result was from a sum. others are xorsum"
                print "ullResultList:"
                for ullResult, fpResult in ullResultList:
                    print "%30s" % "ullResult (0.16x):", "0x%0.16x   %s" % (
                        ullResult, fpResult)

                print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x   %s" % (
                    expectedUllSum, expectedUllSumAsDouble)
                print "%30s" % "expectedFpSum (0.16x):", "0x%0.16x   %s" % (
                    expectedFpSumAsLongLong, expectedFpSum)
Exemplo n.º 10
0
    def test_parse_multiprocess_fvec(self):
        h2o.beta_features = True
        # hdfs://<name node>/datasets/manyfiles-nflx-gz/file_1.dat.gz
        # don't raise exception if we find something bad in h2o stdout/stderr?
        # h2o.nodes[0].sandboxIgnoreErrors = True
        OUTSTANDING = min(10, len(h2o.nodes))

        if DO_IRIS:
            global DO_BIGFILE
            DO_BIGFILE = False
            bucket = 'smalldata'
            importFolderPath = "iris"
            csvFilename = "iris2.csv"
            csvFilePattern = "iris2.csv"
            if localhost:
                trialMax = 20
            else:
                trialMax = 100
        elif DO_BIGFILE:
            bucket = 'home-0xdiag-datasets'
            importFolderPath = "standard"
            csvFilename = "covtype20x.data"
            csvFilePattern = "covtype20x.data"
            trialMax = 2 * OUTSTANDING
        else:
            bucket = 'home-0xdiag-datasets'
            importFolderPath = "standard"
            csvFilename = "covtype.data"
            csvFilePattern = "covtype.data"
            trialMax = 40 * OUTSTANDING

        # add one just to make it odd
        # OUTSTANDING = min(10, len(h2o.nodes) + 1)
        # don't have more than one source file per node OUTSTANDING? (think of the node increment rule)
    
        # okay to reuse the src_key name. h2o deletes? use unique hex to make sure it's not reused.
        # might go to unique src keys also ..oops have to, to prevent complaints about the key (lock)
        # can't repeatedly import the folder

        # only if not noPoll. otherwise parse isn't done
        # I guess I have to use 'put' so I can name the src key unique, to get overlap
        # I could tell h2o to not delete, but it's nice to get the keys in a new place?
        # maybe rebalance? FIX! todo

        parseTrial = 0
        summaryTrial = 0
        uploader_resultq = multiprocessing.Queue()
        while parseTrial <= trialMax:
            start = time.time()
            uploaders = []
            if not DO_IRIS:
                assert OUTSTANDING<=10 , "we only have 10 links with unique names to covtype.data"
            for o in range(OUTSTANDING):
                src_key = csvFilename + "_" + str(parseTrial) 
                hex_key = csvFilename + "_" + str(parseTrial) + ".hexxx"
                # "key": "hdfs://172.16.2.176/datasets/manyfiles-nflx-gz/file_99.dat.gz", 

                # hacked hard ln so source keys would have different names? was getting h2o locking issues
                if DO_IRIS:
                    csvPathname = importFolderPath + "/" + csvFilePattern
                else:
                    csvPathname = importFolderPath + "/" + csvFilePattern + "_" + str(o)
                start = time.time()

                # walk the nodes
                # if this rule is matched for exec/summary below, it should find the name okay? (npe with xorsum)
                # summary2 not seeing it?
                np = parseTrial % len(h2o.nodes)
                retryDelaySecs=5 if DO_BIGFILE else 1
                timeoutSecs=60 if DO_BIGFILE else 15
                tmp = multiprocessing.Process(target=function_no_keyboard_intr,
                    args=(uploader_resultq, uploadit, np, bucket, csvPathname, src_key, hex_key, timeoutSecs, retryDelaySecs))
                tmp.start()
                uploaders.append(tmp)
                parseTrial += 1

            # now sync on them
            for uploader in uploaders:
                try:
                    uploader.join()
                    # don't need him any more
                    uploader.terminate()
                    (importPattern, hex_key) = uploader_resultq.get(timeout=10)
                except KeyboardInterrupt:
                    print 'parent received ctrl-c'
                    for uploader in uploaders:
                        uploader.terminate()
                        uploader.join()
            elapsed = time.time() - start
            print "Parse group end at #", parseTrial, "completed in", "%6.2f" % elapsed, "seconds.", \
                "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)

        print "We might have parses that haven't completed. The join just says we can reuse some files (parse still going)"
        if PARSE_NOPOLL:
            h2o_jobs.pollWaitJobs(timeoutSecs=180)

        h2o_cmd.runStoreView()
        # h2o_jobs.pollStatsWhileBusy(timeoutSecs=300, pollTimeoutSecs=15, retryDelaySecs=0.25)

        if DO_PARSE_ALSO: # only if we parsed
            print "These all go to node [0]"
            # getting a NPE if I do xorsum (any exec?) ..just do summary for now..doesn't seem to have the issue
            # suspect it's about the multi-node stuff above
            for summaryTrial in range(trialMax):

                # do last to first..to get race condition?
                firstXorUll = None
                firstQuantileUll = None
                hex_key = csvFilename + "_" + str(summaryTrial) + ".hexxx"
                
                if DO_EXEC_QUANT:
                    execExpr = "r2=c(1); r2=quantile(%s[,1], c(%s));" % (hex_key, thresholds)
                    (resultExec, fpResult) = h2e.exec_expr(execExpr=execExpr, timeoutSecs=30)
                    ullResult = h2o_util.doubleToUnsignedLongLong(fpResult)
                    print "%30s" % "median ullResult (0.16x):", "0x%0.16x   %s" % (ullResult, fpResult)
                    if firstQuantileUll:
                        self.assertEqual(ullResult, firstQuantileUll)
                    else:
                        firstQuantileUll = ullResult

                if DO_XORSUM:
                    execExpr = "r2=c(1); r2=xorsum(%s[,1], c(%s));" % (hex_key, thresholds)
                    (resultExec, fpResult) = h2e.exec_expr(execExpr=execExpr, timeoutSecs=30)
                    ullResult = h2o_util.doubleToUnsignedLongLong(fpResult)
                    print "%30s" % "xorsum ullResult (0.16x):", "0x%0.16x   %s" % (ullResult, fpResult)

                    if firstXorUll:
                        self.assertEqual(ullResult, firstXorUll)
                    else:
                        firstXorUll = ullResult

                if DO_SUMMARY:
                    h2o_cmd.runSummary(key=hex_key)
Exemplo n.º 11
0
def write_syn_dataset(csvPathname, rowCount, colCount, expectedMin, expectedMax, SEEDPERFILE, sel):
    # this only does the sum stuff for single cols right now
    if colCount!=1:
        raise Exception("only support colCount == 1 here right now %s", colCount)

    NUM_CASES = h2o_util.fp_format()
    if sel and (sel<0 or sel>=NUM_CASES):
        raise Exception("sel used to select from possible fp formats is out of range: %s %s", (sel, NUM_CASES))

    dsf = open(csvPathname, 'w')
    expectedRange = (expectedMax - expectedMin)
    expectedFpSum = float(0)
    expectedUllSum = int(0)
    for row in range(rowCount):
        rowData = []
        for j in range(colCount):

            # Be Nasty!. We know fp compression varies per chunk
            # so...adjust the random fp data, depending on what rows your are at
            # i.e. cluster results per chunk, smaller variance within chunk, larger variance outside of chunk

            # Actually: generate "different" data depending on where you are in the rows
            method = row % CHUNKING_CNT
            
            if method==1:
                value = expectedMin + (random.random() * expectedRange)
            elif method==2:
                value = random.randint(1,1e6)
            elif method==3:
                value = 5555555555555 + row
            else: # method == 0 and > 3

                # value = row * 2
                # bad sum
                # value = 5555555555555 + row
                # bad
                # value = 555555555555 + row
                # value = 55555555555 + row

                # fail
                # value = 5555555555 + row
                # exp = random.randint(0,120)
                # 50 bad?

                # constrain the dynamic range of the numbers to be within IEEE-754 support
                # without loss of precision when adding. Why do we care though?
                # could h2o compress if values are outside that kind of dynamic range ?

                # we want a big exponent?
                # was
                # exp = random.randint(40,71)
                exp = random.randint(0,120)
                # skip over the current bug around int boundaries?
                # have a fixed base
                value = random.random() + (2 ** exp) 

                # value = -1 * value
                # value = 2e9 + row
                # value = 3 * row

            r = random.randint(0,4)
            # 20% negative
            if DO_NEGATIVE and r==0:
                value = -1 * value

            # print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x" % expectedUllSum

            # Now that you know how many decimals you want, 
            # say, 15, just use a rstrip("0") to get rid of the unnecessary 0s:
            # old bugs was: can't rstrip if .16e is used because trailing +00 becomes +, causes NA

            # use a random fp format (string). use sel to force one you like

            # only keeps it to formats with "e"
            if RANDOM_E_FP_FORMATS:
                # s = h2o_util.fp_format(value, sel=sel) # this is e/f/g formats for a particular sel within each group
                # s = h2o_util.fp_format(value, sel=None) # this would be random
                s = h2o_util.fp_format(value, sel=None, only='e') # this would be random, within 'e' only
            else:
                s = h2o_util.fp_format(value, sel=sel, only='e') # use same format for all numbers

            # FIX! strip the trailing zeroes for now because they trigger a bug
            if DO_BUG:
                pass
            else:
                s = s.rstrip("0")

            # now our string formatting will lead to different values when we parse and use it 
            # so we move the expected value generation down here..i.e after we've formatted the string
            # we'll suck it back in as a fp number
            # get the expected patterns from python
            fpResult = float(s)
            expectedUllSum ^= h2o_util.doubleToUnsignedLongLong(fpResult)
            expectedFpSum += fpResult
            # s = ("%.16e" % value)
            rowData.append(s)

            rowDataCsv = ",".join(map(str,rowData))
            dsf.write(rowDataCsv + "\n")

    dsf.close()
    # zero the upper 4 bits of xorsum like h2o does to prevent inf/nan
    # print hex(~(0xf << 60))
    expectedUllSum &= (~(0xf << 60))
    return (expectedUllSum, expectedFpSum)
Exemplo n.º 12
0
def write_syn_dataset(csvPathname, rowCount, colCount, expectedMin,
                      expectedMax, SEEDPERFILE, sel):
    # this only does the sum stuff for single cols right now
    if colCount != 1:
        raise Exception("only support colCount == 1 here right now %s",
                        colCount)

    NUM_CASES = h2o_util.fp_format()
    if sel and (sel < 0 or sel >= NUM_CASES):
        raise Exception(
            "sel used to select from possible fp formats is out of range: %s %s",
            (sel, NUM_CASES))

    dsf = open(csvPathname, 'w')
    expectedRange = (expectedMax - expectedMin)
    expectedFpSum = float(0)
    expectedUllSum = int(0)
    for row in range(rowCount):
        rowData = []
        for j in range(colCount):

            # Be Nasty!. We know fp compression varies per chunk
            # so...adjust the random fp data, depending on what rows your are at
            # i.e. cluster results per chunk, smaller variance within chunk, larger variance outside of chunk

            # Actually: generate "different" data depending on where you are in the rows
            method = row % CHUNKING_CNT

            if method == 1:
                value = expectedMin + (random.random() * expectedRange)
            elif method == 2:
                value = random.randint(1, 1e6)
            elif method == 3:
                value = 5555555555555 + row
            else:  # method == 0 and > 3

                # value = row * 2
                # bad sum
                # value = 5555555555555 + row
                # bad
                # value = 555555555555 + row
                # value = 55555555555 + row

                # fail
                # value = 5555555555 + row
                # exp = random.randint(0,120)
                # 50 bad?

                # constrain the dynamic range of the numbers to be within IEEE-754 support
                # without loss of precision when adding. Why do we care though?
                # could h2o compress if values are outside that kind of dynamic range ?

                # we want a big exponent?
                # was
                # exp = random.randint(40,71)
                exp = random.randint(0, 120)
                # skip over the current bug around int boundaries?
                # have a fixed base
                value = random.random() + (2**exp)

                # value = -1 * value
                # value = 2e9 + row
                # value = 3 * row

            r = random.randint(0, 4)
            # 20% negative
            if DO_NEGATIVE and r == 0:
                value = -1 * value

            # print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x" % expectedUllSum

            # Now that you know how many decimals you want,
            # say, 15, just use a rstrip("0") to get rid of the unnecessary 0s:
            # old bugs was: can't rstrip if .16e is used because trailing +00 becomes +, causes NA

            # use a random fp format (string). use sel to force one you like

            # only keeps it to formats with "e"
            if RANDOM_E_FP_FORMATS:
                # s = h2o_util.fp_format(value, sel=sel) # this is e/f/g formats for a particular sel within each group
                # s = h2o_util.fp_format(value, sel=None) # this would be random
                s = h2o_util.fp_format(
                    value, sel=None,
                    only='e')  # this would be random, within 'e' only
            else:
                s = h2o_util.fp_format(
                    value, sel=sel,
                    only='e')  # use same format for all numbers

            # FIX! strip the trailing zeroes for now because they trigger a bug
            if DO_BUG:
                pass
            else:
                s = s.rstrip("0")

            # now our string formatting will lead to different values when we parse and use it
            # so we move the expected value generation down here..i.e after we've formatted the string
            # we'll suck it back in as a fp number
            # get the expected patterns from python
            fpResult = float(s)
            expectedUllSum ^= h2o_util.doubleToUnsignedLongLong(fpResult)
            expectedFpSum += fpResult
            # s = ("%.16e" % value)
            rowData.append(s)

            rowDataCsv = ",".join(map(str, rowData))
            dsf.write(rowDataCsv + "\n")

    dsf.close()
    # zero the upper 4 bits of xorsum like h2o does to prevent inf/nan
    # print hex(~(0xf << 60))
    expectedUllSum &= (~(0xf << 60))
    return (expectedUllSum, expectedFpSum)
Exemplo n.º 13
0
    def test_exec2_xorsum2(self):
        SYNDATASETS_DIR = h2o.make_syn_dir()
        tryList = [
            (ROWS, 1, 'r1', 0, 10, None),
        ]

        for trial in range(3):
            ullResultList = []
            NUM_FORMAT_CASES = h2o_util.fp_format()
            for (rowCount, colCount, hex_key, expectedMin, expectedMax,
                 expected) in tryList:
                SEEDPERFILE = random.randint(0, sys.maxint)
                # dynamic range of the data may be useful for estimating error
                maxDelta = expectedMax - expectedMin

                csvFilename = 'syn_real_' + str(rowCount) + 'x' + str(
                    colCount) + '.csv'
                csvPathname = SYNDATASETS_DIR + '/' + csvFilename
                csvPathnameFull = h2i.find_folder_and_filename(
                    None, csvPathname, returnFullPath=True)
                print "Creating random", csvPathname

                sel = random.randint(0, NUM_FORMAT_CASES - 1)
                (expectedUllSum, expectedFpSum) = write_syn_dataset(
                    csvPathname, rowCount, colCount, expectedMin, expectedMax,
                    SEEDPERFILE, sel)
                expectedUllSumAsDouble = h2o_util.unsignedLongLongToDouble(
                    expectedUllSum)
                expectedFpSumAsLongLong = h2o_util.doubleToUnsignedLongLong(
                    expectedFpSum)

                parseResult = h2i.import_parse(path=csvPathname,
                                               schema='put',
                                               hex_key=hex_key,
                                               timeoutSecs=3000,
                                               retryDelaySecs=2)
                inspect = h2o_cmd.runInspect(key=hex_key)
                print "numRows:", inspect['numRows']
                print "numCols:", inspect['numCols']
                inspect = h2o_cmd.runInspect(key=hex_key, offset=-1)
                print "inspect offset = -1:", h2o.dump_json(inspect)

                # looking at the 8 bytes of bits for the h2o doubles
                # xorsum will zero out the sign and exponent
                for execExpr in exprList:
                    for repeate in range(3):
                        start = time.time()
                        (execResult, fpResult) = h2e.exec_expr(h2o.nodes[0],
                                                               execExpr,
                                                               resultKey=None,
                                                               timeoutSecs=300)
                        print 'exec took', time.time() - start, 'seconds'
                        print "execResult:", h2o.dump_json(execResult)
                        ullResult = h2o_util.doubleToUnsignedLongLong(fpResult)
                        ullResultList.append((ullResult, fpResult))

                        print "%30s" % "ullResult (0.16x):", "0x%0.16x   %s" % (
                            ullResult, fpResult)
                        print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x   %s" % (
                            expectedUllSum, expectedUllSumAsDouble)
                        print "%30s" % "expectedFpSum (0.16x):", "0x%0.16x   %s" % (
                            expectedFpSumAsLongLong, expectedFpSum)

                        # allow diff of the lsb..either way. needed when integers are parsed

                        # okay for a couple of lsbs to be wrong, due to conversion from stringk
                        # ullResult (0.16x): 0x02c1a21f923cee96   2.15698793923e-295
                        # expectedUllSum (0.16x): 0x02c1a21f923cee97   2.15698793923e-295
                        # expectedFpSum (0.16x): 0x42f054af32b3c408   2.87294442126e+14

                        # ullResult and expectedUllSum are Q ints, (64-bit) so can subtract them.
                        # I guess we don't even care about sign, since we zero the first 4 bits (xorsum) to avoid nan/inf issues
                        if ullResult != expectedUllSum and (
                                abs(ullResult - expectedUllSum) >
                                ALLOWED_DELTA):
                            emsg = "h2o didn't get the same xorsum as python. 0x%0.16x 0x%0.16x" % (
                                ullResult, expectedUllSum)
                            if STOP_ON_ERROR:
                                raise Exception(emsg)
                            else:
                                print emsg

                        # print "%30s" % "hex(bitResult):", hex(ullResult)

                    h2o.check_sandbox_for_errors()

                    print "first result was from a sum. others are xorsum"
                    print "ullResultList:"
                    for ullResult, fpResult in ullResultList:
                        print "%30s" % "ullResult (0.16x):", "0x%0.16x   %s" % (
                            ullResult, fpResult)

                    print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x   %s" % (
                        expectedUllSum, expectedUllSumAsDouble)
                    print "%30s" % "expectedFpSum (0.16x):", "0x%0.16x   %s" % (
                        expectedFpSumAsLongLong, expectedFpSum)
Exemplo n.º 14
0
    def test_exec2_xorsum(self):
        SYNDATASETS_DIR = h2o.make_syn_dir()

        tryList = [
            (ROWS, 1, 'r1', 0, 10, None),
        ]

        for trial in range(10):
            ullResultList = []
            for (rowCount, colCount, hex_key, expectedMin, expectedMax, expected) in tryList:
                SEEDPERFILE = random.randint(0, sys.maxint)
                # dynamic range of the data may be useful for estimating error
                maxDelta = expectedMax - expectedMin

                csvFilename = 'syn_real_' + str(rowCount) + 'x' + str(colCount) + '.csv'
                csvPathname = SYNDATASETS_DIR + '/' + csvFilename
                csvPathnameFull = h2i.find_folder_and_filename(None, csvPathname, returnFullPath=True)
                print "Creating random", csvPathname
                (expectedUllSum, expectedFpSum)  = write_syn_dataset(csvPathname, 
                    rowCount, colCount, expectedMin, expectedMax, SEEDPERFILE)
                expectedUllSumAsDouble = h2o_util.unsignedLongLongToDouble(expectedUllSum)
                expectedFpSumAsLongLong = h2o_util.doubleToUnsignedLongLong(expectedFpSum)

                parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key, 
                    timeoutSecs=3000, retryDelaySecs=2)
                numRows, numCols, parse_key = h2o_cmd.infoFromParse(parseResult)
                assert parse_key == hex_key
                assert numCols == colCount
                assert numRows == rowCount

                inspect = h2o_cmd.runInspect(key=hex_key)
                missingList, labelList, numRows, numCols = h2o_cmd.infoFromInspect(inspect)
                assert len(missingList) == 0

                # looking at the 8 bytes of bits for the h2o doubles
                # xorsum will zero out the sign and exponent
                for execExpr in exprList:
                    for r in range(10):
        
                        if 1==0:
                            execResult = h2o_cmd.runExec(ast=execExpr, timeoutSecs=30)
                            fpResult = execResult['scalar']
                        else:
                            (execResult, fpResult) = h2e.exec_expr(h2o.nodes[0], execExpr, resultKey='x', timeoutSecs=300)
                            # print dump_json(h2o.n0.frames(key="h"))

                        # (execResult, fpResult) = h2e.exec_expr(h2o.nodes[0], execExpr, resultKey='h', timeoutSecs=300)
                        # print dump_json(h2o.n0.frames(key="r1"))
                        print r, "execResult:", h2o.dump_json(execResult)
                        h2o_cmd.runStoreView()

                        ullResult = h2o_util.doubleToUnsignedLongLong(fpResult)
                        ullResultList.append((ullResult, fpResult))

                        print "%30s" % "ullResult (0.16x):", "0x%0.16x   %s" % (ullResult, fpResult)
                        print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x   %s" % (expectedUllSum, expectedUllSumAsDouble)
                        print "%30s" % "expectedFpSum (0.16x):", "0x%0.16x   %s" % (expectedFpSumAsLongLong, expectedFpSum)

                        # allow diff of the lsb..either way
                        # if ullResult!=expectedUllSum and abs((ullResult-expectedUllSum)>3):
                        if ullResult!=expectedUllSum:
                            raise Exception("h2o didn't get the same xorsum as python. 0x%0.16x 0x%0.16x" % \
                                (ullResult, expectedUllSum))
                            print "h2o didn't get the same xorsum as python. 0x%0.16x 0x%0.16x" % \
                                (ullResult, expectedUllSum)

                h2o.check_sandbox_for_errors()

                print "first result was from a sum. others are xorsum"
                print "ullResultList:"
                for ullResult, fpResult in ullResultList:
                    print "%30s" % "ullResult (0.16x):", "0x%0.16x   %s" % (ullResult, fpResult)

                print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x   %s" % (expectedUllSum, expectedUllSumAsDouble)
                print "%30s" % "expectedFpSum (0.16x):", "0x%0.16x   %s" % (expectedFpSumAsLongLong, expectedFpSum)
Exemplo n.º 15
0
def write_syn_dataset(csvPathname, rowCount, colCount, expectedMin, expectedMax, SEEDPERFILE, sel):
    dsf = open(csvPathname, 'w')
    expectedRange = (expectedMax - expectedMin)
    expectedFpSum = float(0)
    expectedUllSum = int(0)
    for row in range(rowCount):
        rowData = []
        for j in range(colCount):
            value = expectedMin + (random.random() * expectedRange)
            if 1==1:
                # value = row * 2

                # bad sum
                # value = 5555555555555 + row
                # bad
                # value = 555555555555 + row
                # value = 55555555555 + row

                # fail
                # value = 5555555555 + row
                # exp = random.randint(0,120)
                # 50 bad?

                # constrain the dynamic range of the numbers to be within IEEE-754 support
                # without loss of precision when adding. Why do we care though?
                # could h2o compress if values are outside that kind of dynamic range ?

                # we want a big exponent?
                exp = random.randint(40,71)
                # skip over the current bug around int boundaries?
                # have a fixed base
                value = random.random() + (2 ** exp) 

                # value = -1 * value
                # value = 2e9 + row
                # value = 3 * row
            r = random.randint(0,1)
            if False and r==0:
                value = -1 * value
            # hack

            # print "%30s" % "expectedUllSum (0.16x):", "0x%0.16x" % expectedUllSum

            # Now that you know how many decimals you want, 
            # say, 15, just use a rstrip("0") to get rid of the unnecessary 0s:
            # fix. can't rstrip if .16e is used because trailing +00 becomes +, causes NA
            if 1==0:
                # get the expected patterns from python
                fpResult = float(value)
                expectedUllSum ^= h2o_util.doubleToUnsignedLongLong(fpResult)
                expectedFpSum += fpResult
                s = ("%.16f" % value).rstrip("0")
                # since we're printing full fp precision always here, we shouldn't have 
                # to suck the formatted fp string (shorter?) back in
            # use a random fp format (string). use sel to force one you like
            else:
                NUM_CASES = h2o_util.fp_format()
                # s = h2o_util.fp_format(value, sel=None) # random
                s = h2o_util.fp_format(value, sel=sel, only='e') # use same case for all numbers
                # FIX! strip the trailing zeroes for now because they trigger a bug
                s = s.rstrip("0")
                # now our string formatting will lead to different values when we parse and use it 
                # so we move the expected value generation down here..i.e after we've formatted the string
                # we'll suck it back in as a fp number
                # get the expected patterns from python
                fpResult = float(s)
                expectedUllSum ^= h2o_util.doubleToUnsignedLongLong(fpResult)
                expectedFpSum += fpResult
            # s = ("%.16e" % value)
            rowData.append(s)

        rowDataCsv = ",".join(map(str,rowData))
        dsf.write(rowDataCsv + "\n")

    dsf.close()
    # zero the upper 4 bits of xorsum like h2o does to prevent inf/nan
    # print hex(~(0xf << 60))
    expectedUllSum &= (~(0xf << 60))
    return (expectedUllSum, expectedFpSum)