Exemplo n.º 1
0
    def test_GLM2_princeton(self):
        # filename, y, timeoutSecs
        # these are all counts? using gaussian?
        csvFilenameList = [
            ('cuse.dat', 'gaussian', 3, 10), # notUsing
            ('cuse.dat', 'gaussian', 4, 10), # using
            ('copen.dat', 'gaussian', 4, 10),
            ('housing.raw', 'gaussian', 4, 10),
            ]

        trial = 0
        for (csvFilename, family, y, timeoutSecs) in csvFilenameList:
            csvPathname1 = 'logreg/princeton/' + csvFilename
            fullPathname1 = h2i.find_folder_and_filename('smalldata', csvPathname1, returnFullPath=True)
            csvPathname2 = SYNDATASETS_DIR + '/' + csvFilename + '_stripped.csv'
            h2o_util.file_strip_trailing_spaces(fullPathname1, csvPathname2)

            parseResult = h2i.import_parse(path=csvPathname2, schema='put', timeoutSecs=timeoutSecs)
            start = time.time()
            kwargs = {'n_folds': 0, 'family': family, 'response': y}
            glm = h2o_cmd.runGLM(parseResult=parseResult, timeoutSecs=timeoutSecs, **kwargs)
            h2o_glm.simpleCheckGLM(self, glm, None, **kwargs)
            print "glm end (w/check) on ", csvPathname2, 'took', time.time() - start, 'seconds'
            trial += 1
            print "\nTrial #", trial
Exemplo n.º 2
0
    def test_GLM_princeton(self):
        # filename, y, timeoutSecs
        # these are all counts? using gaussian?
        csvFilenameList = [
            ('cuse.dat', 'gaussian', 3, 5),  # notUsing
            ('cuse.dat', 'gaussian', 4, 5),  # using
            ('copen.dat', 'gaussian', 4, 5),
            ('housing.raw', 'gaussian', 4, 5),
        ]

        trial = 0
        for (csvFilename, family, y, timeoutSecs) in csvFilenameList:
            csvPathname1 = h2o.find_file("smalldata/logreg/princeton/" +
                                         csvFilename)
            csvPathname2 = SYNDATASETS_DIR + '/' + csvFilename + '_stripped.csv'
            h2o_util.file_strip_trailing_spaces(csvPathname1, csvPathname2)

            kwargs = {
                'n_folds': 0,
                'family': family,
                'link': 'familyDefault',
                'y': y
            }
            start = time.time()
            glm = h2o_cmd.runGLM(csvPathname=csvPathname2,
                                 key=csvFilename,
                                 timeoutSecs=timeoutSecs,
                                 **kwargs)
            h2o_glm.simpleCheckGLM(self, glm, None, **kwargs)
            print "glm end (w/check) on ", csvPathname2, 'took', time.time(
            ) - start, 'seconds'
            trial += 1
            print "\nTrial #", trial
Exemplo n.º 3
0
    def test_GLM_princeton(self):
        # filename, y, timeoutSecs
        # these are all counts? using gaussian?
        csvFilenameList = [
            ("cuse.dat", "gaussian", 3, 5),  # notUsing
            ("cuse.dat", "gaussian", 4, 5),  # using
            ("copen.dat", "gaussian", 4, 5),
            ("housing.raw", "gaussian", 4, 5),
        ]

        trial = 0
        for (csvFilename, family, y, timeoutSecs) in csvFilenameList:
            csvPathname1 = h2o.find_file("smalldata/logreg/princeton/" + csvFilename)
            csvPathname2 = SYNDATASETS_DIR + "/" + csvFilename + "_stripped.csv"
            h2o_util.file_strip_trailing_spaces(csvPathname1, csvPathname2)

            kwargs = {"n_folds": 0, "family": family, "link": "familyDefault", "y": y}
            start = time.time()
            glm = h2o_cmd.runGLM(csvPathname=csvPathname2, key=csvFilename, timeoutSecs=timeoutSecs, **kwargs)
            h2o_glm.simpleCheckGLM(self, glm, None, **kwargs)
            print "glm end (w/check) on ", csvPathname2, "took", time.time() - start, "seconds"
            trial += 1
            print "\nTrial #", trial