def parseS3File(self, s3bucket, filename, **kwargs): start = time.time() parseKey = h2o_cmd.parseS3File(bucket=s3bucket, filename=filename, **kwargs) parse_time = time.time() - start h2o.verboseprint("py-S3 parse took {0} sec".format(parse_time)) parseKey['python_call_timer'] = parse_time return parseKey
def parseS3File(self, s3bucket, filename, **kwargs): start = time.time() parseResult = h2o_cmd.parseS3File(bucket=s3bucket, filename=filename, **kwargs) parse_time = time.time() - start h2o.verboseprint("py-S3 parse took {0} sec".format(parse_time)) parseResult['python_call_timer'] = parse_time return parseResult
def test_RF_1000trees(self): # NAs cause CM to zero..don't run for now ### csvPathnamegz = h2o.find_file('smalldata/hhp_9_17_12.predict.100rows.data.gz') s3bucket = self.s3_default_bucket() s3dataset = 'covtype20x.data.gz' s3dataset = 'covtype.data' s3dataset = 'covtype200x.data.gz' s3dataset = 'covtype50x.data' s3dataset = 'covtype100x.data' s3dataset = 'covtype.20k.data' s3dataset = 'covtype.data' start = time.time() parseResult = h2o_cmd.parseS3File(bucket=s3bucket, filename=s3dataset, timeoutSecs=14800) print "Parsing took {0}".format(time.time() - start) start = time.time() rf_train = h2o_cmd.runRF(parseResult=parseResult, ntree=100, timeoutSecs=14800, bin_limit=20000, out_of_bag_error_estimate=1, stat_type='ENTROPY', depth=100, exclusive_split_limit=0) print "Computation took {0} sec".format(time.time() - start) print h2o_rf.pp_rf_result(rf_train)
def test_RF_1000trees(self): # NAs cause CM to zero..don't run for now ### csvPathnamegz = h2o.find_file('smalldata/hhp_9_17_12.predict.100rows.data.gz') s3bucket = self.s3_default_bucket() s3dataset = "covtype20x.data.gz" s3dataset = "covtype.data" s3dataset = "covtype200x.data.gz" s3dataset = "covtype50x.data" s3dataset = "covtype100x.data" s3dataset = "covtype.20k.data" s3dataset = "covtype.data" start = time.time() parseKey = h2o_cmd.parseS3File(bucket=s3bucket, filename=s3dataset, timeoutSecs=14800) print "Parsing took {0}".format(time.time() - start) start = time.time() rf_train = h2o_cmd.runRFOnly( parseKey=parseKey, ntree=100, timeoutSecs=14800, bin_limit=20000, out_of_bag_error_estimate=1, gini=0, depth=100, exclusive_split_limit=0, ) print "Computation took {0} sec".format(time.time() - start) print h2o_rf.pp_rf_result(rf_train)