Exemplo n.º 1
0
    def test_GLM2_score_same(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        bucket = 'home-0xdiag-datasets'
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = 'standard' + '/' + csvFilename

        y = "10"
        kwargs = {'response':  y, 'alpha': 0, 'family': 'gaussian'}
        (modelKey, validation1, parseResult) = glm_doit(self, csvFilename, bucket, csvPathname, 
            timeoutSecs=60, pollTimeoutSecs=60, **kwargs) 
        print "Use", modelKey, "model on 2x and 4x replications and compare results to 1x"

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x

        fullPathname = h2i.find_folder_and_filename(bucket, csvPathname, returnFullPath=True)
        h2o_util.file_gunzip(fullPathname, pathname1x)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        modelPathname = SYNDATASETS_DIR + '/model_' + filename2x
        bucket = None
        h2o_util.file_cat(pathname1x,pathname1x,pathname2x)
        glm_score(self,filename2x, bucket, pathname2x, modelKey, modelPathname, timeoutSecs=60, pollTimeoutSecs=60)

        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        modelPathname = SYNDATASETS_DIR + '/model_' + filename4x
        h2o_util.file_cat(pathname2x, pathname2x, pathname4x)
        
        print "Iterating 3 times on this last one"
        for i in range(3):
            print "\nTrial #", i, "of", filename4x
            glm_score(self,filename4x, bucket, pathname4x, modelKey, modelPathname, timeoutSecs=60, pollTimeoutSecs=60)
Exemplo n.º 2
0
    def test_1mx10_hastie_10_2_cat_and_shuffle(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)

        # This test also adds file shuffling, to see that row order doesn't matter
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = h2o.find_dataset('logreg' + '/' + csvFilename)
        kmeans_doit(self, csvFilename, csvPathname, num_rows=1000000, timeoutSecs=60)

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(csvPathname, pathname1x)
        
        filename1xShuf = "hastie_1x.data_shuf"
        pathname1xShuf = SYNDATASETS_DIR + '/' + filename1xShuf
        h2o_util.file_shuffle(pathname1x, pathname1xShuf)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1xShuf, pathname1xShuf, pathname2x)

        filename2xShuf = "hastie_2x.data_shuf"
        pathname2xShuf = SYNDATASETS_DIR + '/' + filename2xShuf
        h2o_util.file_shuffle(pathname2x, pathname2xShuf)
        kmeans_doit(self, filename2xShuf, pathname2xShuf, num_rows=2000000, timeoutSecs=90)

        # too big to shuffle?
        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        h2o_util.file_cat(pathname2xShuf, pathname2xShuf, pathname4x)
        kmeans_doit(self, filename4x, pathname4x, num_rows=4000000, timeoutSecs=120)
    def test_KMeans_hastie_shuffle_fvec(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)

        # This test also adds file shuffling, to see that row order doesn't matter
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = 'standard/' + csvFilename
        bucket = 'home-0xdiag-datasets'
        kmeans_doit(self, csvFilename, bucket, csvPathname, numRows=1000000, timeoutSecs=60)
        fullPathname = h2i.find_folder_and_filename(bucket, csvPathname, returnFullPath=True)

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(fullPathname, pathname1x)
        
        filename1xShuf = "hastie_1x.data_shuf"
        pathname1xShuf = SYNDATASETS_DIR + '/' + filename1xShuf
        h2o_util.file_shuffle(pathname1x, pathname1xShuf)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1xShuf, pathname1xShuf, pathname2x)

        filename2xShuf = "hastie_2x.data_shuf"
        pathname2xShuf = SYNDATASETS_DIR + '/' + filename2xShuf
        h2o_util.file_shuffle(pathname2x, pathname2xShuf)
        kmeans_doit(self, filename2xShuf, None, pathname2xShuf, numRows=2000000, timeoutSecs=90)

        # too big to shuffle?
        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        h2o_util.file_cat(pathname2xShuf, pathname2xShuf, pathname4x)
        kmeans_doit(self, filename4x, None, pathname4x, numRows=4000000, timeoutSecs=120)
Exemplo n.º 4
0
    def test_A_1mx10_hastie_10_2(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = h2o.find_dataset('logreg' + '/' + csvFilename)

        y = "10"
        x = ""
        kwargs = {'x': x, 'y':  y, 'case': -1, 'thresholds': 0.5}
        (modelKey, validations1) = glm_doit(self, csvFilename, csvPathname, 
            timeoutSecs=60, pollTimeoutSecs=60, **kwargs)

        print "Use", modelKey, "model on 2x and 4x replications and compare results to 1x"

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(csvPathname, pathname1x)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1x,pathname1x,pathname2x)
        glm_score(self,filename2x, pathname2x, modelKey, thresholds="0.5",
            timeoutSecs=60, pollTimeoutSecs=60)

        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        h2o_util.file_cat(pathname2x,pathname2x,pathname4x)
        
        print "Iterating 3 times on this last one"
        for i in range(3):
            print "\nTrial #", i, "of", filename4x
            glm_score(self,filename4x, pathname4x, modelKey, thresholds="0.5",
                timeoutSecs=60, pollTimeoutSecs=60)
Exemplo n.º 5
0
    def test_GLM_hastie(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)
        bucket = 'home-0xdiag-datasets'
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = 'standard' + '/' + csvFilename
        glm_doit(self, csvFilename, bucket, csvPathname, timeoutSecs=75)
        fullPathname = h2i.find_folder_and_filename(bucket, csvPathname, returnFullPath=True)

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(fullPathname, pathname1x)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1x,pathname1x,pathname2x)
        glm_doit(self,filename2x, None, pathname2x, timeoutSecs=75)

        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        h2o_util.file_cat(pathname2x,pathname2x,pathname4x)
        
        print "Iterating 3 times on this last one for perf compare"
        for i in range(3):
            print "\nTrial #", i, "of", filename4x
            glm_doit(self, filename4x, None, pathname4x, timeoutSecs=150)
Exemplo n.º 6
0
    def test_1mx10_hastie_10_2_cat_and_shuffle(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)

        # This test also adds file shuffling, to see that row order doesn't matter
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = h2o.find_dataset('logreg' + '/' + csvFilename)
        glm_doit(self, csvFilename, csvPathname, timeoutSecs=30)

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(csvPathname, pathname1x)

        filename1xShuf = "hastie_1x.data_shuf"
        pathname1xShuf = SYNDATASETS_DIR + '/' + filename1xShuf
        h2o_util.file_shuffle(pathname1x, pathname1xShuf)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1xShuf, pathname1xShuf, pathname2x)

        filename2xShuf = "hastie_2x.data_shuf"
        pathname2xShuf = SYNDATASETS_DIR + '/' + filename2xShuf
        h2o_util.file_shuffle(pathname2x, pathname2xShuf)
        glm_doit(self, filename2xShuf, pathname2xShuf, timeoutSecs=45)

        # too big to shuffle?
        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        h2o_util.file_cat(pathname2xShuf, pathname2xShuf, pathname4x)
        glm_doit(self, filename4x, pathname4x, timeoutSecs=120)
    def test_GLM_hastie_shuffle(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)

        # This test also adds file shuffling, to see that row order doesn't matter
        csvFilename = "1mx10_hastie_10_2.data.gz"
        bucket = 'home-0xdiag-datasets'
        csvPathname = 'standard' + '/' + csvFilename
        fullPathname = h2i.find_folder_and_filename(bucket, csvPathname, returnFullPath=True)

        glm_doit(self, csvFilename, bucket, csvPathname, timeoutSecs=30)

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(fullPathname, pathname1x)

        filename1xShuf = "hastie_1x.data_shuf"
        pathname1xShuf = SYNDATASETS_DIR + '/' + filename1xShuf
        h2o_util.file_shuffle(pathname1x, pathname1xShuf)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1xShuf, pathname1xShuf, pathname2x)

        filename2xShuf = "hastie_2x.data_shuf"
        pathname2xShuf = SYNDATASETS_DIR + '/' + filename2xShuf
        h2o_util.file_shuffle(pathname2x, pathname2xShuf)
        glm_doit(self, filename2xShuf, None, pathname2xShuf, timeoutSecs=45)

        # too big to shuffle?
        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        h2o_util.file_cat(pathname2xShuf,pathname2xShuf,pathname4x)
        glm_doit(self,filename4x, None, pathname4x, timeoutSecs=120)
Exemplo n.º 8
0
    def test_A_1mx10_hastie_10_2(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = h2o.find_dataset('logreg' + '/' + csvFilename)
        glm_doit(self,csvFilename, csvPathname, timeoutSecs=30)

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(csvPathname, pathname1x)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1x,pathname1x,pathname2x)
        glm_doit(self,filename2x, pathname2x, timeoutSecs=45)

        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        h2o_util.file_cat(pathname2x,pathname2x,pathname4x)
        
        print "Iterating 3 times on this last one for perf compare"
        for i in range(3):
            print "\nTrial #", i, "of", filename4x
            glm_doit(self,filename4x, pathname4x, timeoutSecs=60)
Exemplo n.º 9
0
    def test_GLM_hastie(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)
        bucket = 'home-0xdiag-datasets'
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = 'standard' + '/' + csvFilename
        glm_doit(self, csvFilename, bucket, csvPathname, timeoutSecs=75)
        fullPathname = h2i.find_folder_and_filename(bucket,
                                                    csvPathname,
                                                    returnFullPath=True)

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(fullPathname, pathname1x)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1x, pathname1x, pathname2x)
        glm_doit(self, filename2x, None, pathname2x, timeoutSecs=75)

        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        h2o_util.file_cat(pathname2x, pathname2x, pathname4x)

        print "Iterating 3 times on this last one for perf compare"
        for i in range(3):
            print "\nTrial #", i, "of", filename4x
            glm_doit(self, filename4x, None, pathname4x, timeoutSecs=150)
Exemplo n.º 10
0
    def test_1mx10_hastie_10_2_cat_and_shuffle(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)

        # This test also adds file shuffling, to see that row order doesn't matter
        csvFilename = "1mx10_hastie_10_2.data.gz"
        bucket = 'datasets'
        csvPathname = 'logreg' + '/' + csvFilename
        fullPathname = h2i.find_folder_and_filename(bucket, csvPathname, returnFullPath=True)

        glm_doit(self, csvFilename, bucket, csvPathname, timeoutSecs=30)

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(fullPathname, pathname1x)
        
        filename1xShuf = "hastie_1x.data_shuf"
        pathname1xShuf = SYNDATASETS_DIR + '/' + filename1xShuf
        h2o_util.file_shuffle(pathname1x, pathname1xShuf)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1xShuf, pathname1xShuf, pathname2x)

        filename2xShuf = "hastie_2x.data_shuf"
        pathname2xShuf = SYNDATASETS_DIR + '/' + filename2xShuf
        h2o_util.file_shuffle(pathname2x, pathname2xShuf)
        glm_doit(self, filename2xShuf, None, pathname2xShuf, timeoutSecs=45)

        # too big to shuffle?
        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        h2o_util.file_cat(pathname2xShuf,pathname2xShuf,pathname4x)
        glm_doit(self,filename4x, None, pathname4x, timeoutSecs=120)
Exemplo n.º 11
0
    def test_A_1mx10_hastie_10_2(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = h2o.find_dataset('logreg' + '/' + csvFilename)
        glm_doit(self,csvFilename, csvPathname, timeoutSecs=75)

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(csvPathname, pathname1x)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1x,pathname1x,pathname2x)
        glm_doit(self,filename2x, pathname2x, timeoutSecs=75)

        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        h2o_util.file_cat(pathname2x,pathname2x,pathname4x)
        
        print "Iterating 3 times on this last one for perf compare"
        for i in range(3):
            print "\nTrial #", i, "of", filename4x
            glm_doit(self,filename4x, pathname4x, timeoutSecs=150)
Exemplo n.º 12
0
    def test_GLM2_score_same(self):
        h2o.beta_features = True
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        bucket = 'home-0xdiag-datasets'
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = 'standard' + '/' + csvFilename

        y = "10"
        kwargs = {'response':  y, 'alpha': 0, 'family': 'gaussian'}
        (modelKey, validation1, parseResult) = glm_doit(self, csvFilename, bucket, csvPathname, 
            timeoutSecs=60, pollTimeoutSecs=60, **kwargs) 
        print "Use", modelKey, "model on 2x and 4x replications and compare results to 1x"

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x

        fullPathname = h2i.find_folder_and_filename(bucket, csvPathname, returnFullPath=True)
        h2o_util.file_gunzip(fullPathname, pathname1x)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        modelPathname = SYNDATASETS_DIR + '/model_' + filename2x
        bucket = None
        h2o_util.file_cat(pathname1x,pathname1x,pathname2x)
        glm_score(self,filename2x, bucket, pathname2x, modelKey, modelPathname, timeoutSecs=60, pollTimeoutSecs=60)

        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        modelPathname = SYNDATASETS_DIR + '/model_' + filename4x
        h2o_util.file_cat(pathname2x, pathname2x, pathname4x)
        
        print "Iterating 3 times on this last one"
        for i in range(3):
            print "\nTrial #", i, "of", filename4x
            glm_score(self,filename4x, bucket, pathname4x, modelKey, modelPathname, timeoutSecs=60, pollTimeoutSecs=60)
Exemplo n.º 13
0
    def test_A_1mx10_hastie_10_2(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = h2o.find_dataset('logreg' + '/' + csvFilename)
        glm_doit(self,csvFilename, csvPathname, timeoutSecs=300)

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(csvPathname, pathname1x)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1x,pathname1x,pathname2x)
        glm_doit(self,filename2x, pathname2x, timeoutSecs=300)
Exemplo n.º 14
0
    def test_A_1mx10_hastie_10_2(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = h2o.find_dataset('logreg' + '/' + csvFilename)
        glm_doit(self, csvFilename, csvPathname, timeoutSecs=300)

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(csvPathname, pathname1x)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1x, pathname1x, pathname2x)
        glm_doit(self, filename2x, pathname2x, timeoutSecs=300)
Exemplo n.º 15
0
    def test_A_1mx10_hastie_10_2(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)
        bucket = 'home-0xdiag-datasets'
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = 'standard' + '/' + csvFilename
        glm_doit(self, csvFilename, bucket, csvPathname, timeoutSecs=300)

        fullPathname = h2i.find_folder_and_filename('home-0xdiag-datasets', csvPathname, returnFullPath=True)
        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(fullPathname, pathname1x)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1x,pathname1x,pathname2x)
        glm_doit(self, filename2x, None, pathname2x, timeoutSecs=300)
Exemplo n.º 16
0
    def test_A_1mx10_hastie_10_2(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        # FIX! eventually we'll compare the 1x, 2x and 4x results like we do
        # in other tests. (catdata?)
        bucket = 'datasets'
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = 'logreg' + '/' + csvFilename
        glm_doit(self, csvFilename, bucket, csvPathname, timeoutSecs=300)

        fullPathname = h2i.find_folder_and_filename('datasets', csvPathname, returnFullPath=True)
        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x
        h2o_util.file_gunzip(fullPathname, pathname1x)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        h2o_util.file_cat(pathname1x,pathname1x,pathname2x)
        glm_doit(self, filename2x, None, pathname2x, timeoutSecs=300)
Exemplo n.º 17
0
    def test_GLM_score_same(self):
        # gunzip it and cat it to create 2x and 4x replications in SYNDATASETS_DIR
        bucket = 'datasets'
        csvFilename = "1mx10_hastie_10_2.data.gz"
        csvPathname = 'logreg' + '/' + csvFilename

        y = "10"
        x = ""
        kwargs = {'x': x, 'y':  y, 'case': -1, 'thresholds': 0.5}
        (modelKey, validations1, parseResult) = glm_doit(self, csvFilename, bucket, csvPathname, 
            timeoutSecs=60, pollTimeoutSecs=60, **kwargs)

        print "Use", modelKey, "model on 2x and 4x replications and compare results to 1x"

        filename1x = "hastie_1x.data"
        pathname1x = SYNDATASETS_DIR + '/' + filename1x

        fullPathname = h2i.find_folder_and_filename(bucket, csvPathname, returnFullPath=True)
        h2o_util.file_gunzip(fullPathname, pathname1x)

        filename2x = "hastie_2x.data"
        pathname2x = SYNDATASETS_DIR + '/' + filename2x
        bucket = None
        h2o_util.file_cat(pathname1x,pathname1x,pathname2x)
        glm_score(self,filename2x, bucket, pathname2x, modelKey, thresholds="0.5",
            timeoutSecs=60, pollTimeoutSecs=60)

        filename4x = "hastie_4x.data"
        pathname4x = SYNDATASETS_DIR + '/' + filename4x
        h2o_util.file_cat(pathname2x, pathname2x, pathname4x)
        
        print "Iterating 3 times on this last one"
        for i in range(3):
            print "\nTrial #", i, "of", filename4x
            glm_score(self,filename4x, bucket, pathname4x, modelKey, thresholds="0.5",
                timeoutSecs=60, pollTimeoutSecs=60)