def javapredict_cars(): # optional parameters params = { 'ntrees': 5000, 'max_depth': 10, 'min_rows': 1, 'learn_rate': 0.1, 'balance_classes': random.sample([True, False], 1)[0] } print "Parameter list:" for k, v in zip(params.keys(), params.values()): print "{0}, {1}".format(k, v) train = h2o.import_file( tests.locate("smalldata/junit/cars_nice_header.csv")) test = h2o.import_file( tests.locate("smalldata/junit/cars_nice_header.csv")) x = [ "name", "economy", "displacement", "power", "weight", "acceleration", "year" ] y = "cylinders" tests.javapredict("gbm", "numeric", train, test, x, y, **params)
def javapredict_iris_drf(): # optional parameters params = {'ntrees':100, 'max_depth':5, 'min_rows':10} print "Parameter list:" for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v) train = h2o.import_file(tests.locate("smalldata/iris/iris_train.csv")) test = h2o.import_file(tests.locate("smalldata/iris/iris_train.csv")) x = ["sepal_len","sepal_wid","petal_len","petal_wid"] y = "species" tests.javapredict("random_forest", "class", train, test, x, y, **params)
def javapredict_smallcat(): # optional parameters params = {'ntrees':100, 'max_depth':5, 'min_rows':10} print "Parameter list:" for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v) train = h2o.upload_file(h2o.locate("smalldata/iris/setosa_versicolor.csv")) test = h2o.upload_file(h2o.locate("smalldata/iris/virginica.csv")) x = [0,1,2,4] y = 3 tests.javapredict("random_forest", "numeric", train, test, x, y, **params)
def javapredict_smallcat(): # optional parameters params = {'epochs':100} print "Parameter list:" for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v) train = h2o.upload_file(tests.locate("smalldata/iris/setosa_versicolor.csv")) test = h2o.upload_file(tests.locate("smalldata/iris/virginica.csv")) x = [0,1,2,4] y = 3 tests.javapredict("deeplearning", "numeric", train, test, x, y, **params)
def javapredict_cars(): # optional parameters params = {'ntrees':5000, 'max_depth':10, 'min_rows':1, 'learn_rate':0.1, 'balance_classes':random.sample([True,False],1)[0]} print "Parameter list:" for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v) train = h2o.import_file(h2o.locate("smalldata/junit/cars_nice_header.csv")) test = h2o.import_file(h2o.locate("smalldata/junit/cars_nice_header.csv")) x = ["name","economy", "displacement","power","weight","acceleration","year"] y = "cylinders" tests.javapredict("gbm", "numeric", train, test, x, y, **params)
def javapredict_2x100000(): # optional parameters params = {"max_iterations":1, "solver":"L_BFGS"} print "Parameter list:" for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v) train = h2o.import_file(tests.locate("smalldata/jira/2x100000_real.csv.gz")) test = train x = range(1,train.ncol) y = 0 tests.javapredict("glm", "numeric", train, test, x, y, **params)
def javapredict_smallcat(): # optional parameters params = {'ntrees': 100, 'max_depth': 5, 'min_rows': 10} print "Parameter list:" for k, v in zip(params.keys(), params.values()): print "{0}, {1}".format(k, v) train = h2o.upload_file( tests.locate("smalldata/iris/setosa_versicolor.csv")) test = h2o.upload_file(tests.locate("smalldata/iris/virginica.csv")) x = [0, 1, 2, 4] y = 3 tests.javapredict("random_forest", "numeric", train, test, x, y, **params)
def javapredict_smallcat(): # optional parameters params = {'epochs': 100} print "Parameter list:" for k, v in zip(params.keys(), params.values()): print "{0}, {1}".format(k, v) train = h2o.upload_file( tests.locate("smalldata/iris/setosa_versicolor.csv")) test = h2o.upload_file(tests.locate("smalldata/iris/virginica.csv")) x = [0, 1, 2, 4] y = 3 tests.javapredict("deeplearning", "numeric", train, test, x, y, **params)
def javapredict_dl_xlarge(): hdfs_name_node = tests.hadoop_namenode() hdfs_file_name = "/datasets/z_repro.csv" url = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_file_name) params = {'hidden':[3500, 3500], 'epochs':0.0001} # 436MB pojo print "Parameter list:" for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v) train = h2o.import_file(url) test = train[range(0,10),:] x = range(1,train.ncol) y = 0 tests.javapredict("deeplearning", "numeric", train, test, x, y, **params)
def javapredict_drf_xlarge(): hdfs_name_node = tests.hadoop_namenode() hdfs_file_name = "/datasets/z_repro.csv" url = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_file_name) params = {'ntrees':20, 'max_depth':35, 'min_rows':1} # 739MB pojo print "Parameter list:" for k,v in zip(params.keys(), params.values()): print "{0}, {1}".format(k,v) train = h2o.import_file(url) test = train[range(0,10),:] x = range(1,train.ncol) y = 0 tests.javapredict("random_forest", "numeric", train, test, x, y, **params)
def javapredict_gbm_xlarge(): hdfs_name_node = tests.hadoop_namenode() hdfs_file_name = "/datasets/z_repro.csv.gz" url = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_file_name) params = {"ntrees": 22, "max_depth": 37, "min_rows": 1, "sample_rate": 0.1} # 651MB pojo print "Parameter list:" for k, v in zip(params.keys(), params.values()): print "{0}, {1}".format(k, v) train = h2o.import_file(url) test = train[range(0, 10), :] x = range(1, train.ncol) y = 0 tests.javapredict("gbm", "numeric", train, test, x, y, **params)