def runComparisonTests(autoEncoder, actFun, missingValuesHandling, setAllFactor, train, test, x): params = set_params(actFun, missingValuesHandling, setAllFactor, autoEncoder) # set deeplearning model parameters if autoEncoder: try: deeplearningModel = build_save_model( params, x, train) # build and save mojo model except Exception as err: if not ("Trying to predict with an unstable model" in err.args[0]): raise Exception( 'Deeplearning autoencoder model failed to build. Fix it.') return else: deeplearningModel = build_save_model( params, x, train) # build and save mojo model h2o.download_csv(test[x], os.path.join( TMPDIR, 'in.csv')) # save test file, h2o predict/mojo use same file pred_h2o, pred_mojo = pyunit_utils.mojo_predict( deeplearningModel, TMPDIR, MOJONAME) # load model and perform predict pred_pojo = pyunit_utils.pojo_predict(deeplearningModel, TMPDIR, MOJONAME) h2o.save_model(deeplearningModel, path=TMPDIR, force=True) # save model for debugging print("Comparing mojo predict and h2o predict...") pyunit_utils.compare_frames_local_onecolumn_NA(pred_h2o, pred_mojo, prob=1, tol=1e-10) print("Comparing pojo predict and h2o predict...") pyunit_utils.compare_frames_local_onecolumn_NA(pred_mojo, pred_pojo, prob=1, tol=1e-10)
def glm_fractional_binomial_mojo_pojo(): params = set_params() train = h2o.import_file( pyunit_utils.locate("smalldata/glm_test/fraction_binommialOrig.csv")) test = h2o.import_file( pyunit_utils.locate("smalldata/glm_test/fraction_binommialOrig.csv")) x = ["log10conc"] y = "y" glmModel = pyunit_utils.build_save_model_GLM( params, x, train, y) # build and save mojo model MOJONAME = pyunit_utils.getMojoName(glmModel._id) TMPDIR = os.path.normpath( os.path.join(os.path.dirname(os.path.realpath('__file__')), "..", "results", MOJONAME)) h2o.download_csv(test[x], os.path.join( TMPDIR, 'in.csv')) # save test file, h2o predict/mojo use same file pred_h2o, pred_mojo = pyunit_utils.mojo_predict( glmModel, TMPDIR, MOJONAME) # load model and perform predict h2o.download_csv(pred_h2o, os.path.join(TMPDIR, "h2oPred.csv")) pred_pojo = pyunit_utils.pojo_predict(glmModel, TMPDIR, MOJONAME) pred_h2o = pred_h2o.drop(3) print("Comparing mojo predict and h2o predict...") pyunit_utils.compare_frames_local( pred_h2o, pred_mojo, 0.1, tol=1e-10 ) # make sure operation sequence is preserved from Tomk h2o.save_model(glmOrdinalModel, path=TMPDIR, force=True) # save model for debugging print("Comparing pojo predict and h2o predict...") pyunit_utils.compare_frames_local(pred_mojo, pred_pojo, 0.1, tol=1e-10)
def glm_binomial_mojo_pojo(): h2o.remove_all() NTESTROWS = 200 # number of test dataset rows PROBLEM = "binomial" params = set_params() # set deeplearning model parameters df = pyunit_utils.random_dataset(PROBLEM) # generate random dataset train = df[NTESTROWS:, :] test = df[:NTESTROWS, :] x = list(set(df.names) - {"response"}) TMPDIR = tempfile.mkdtemp() glmBinomialModel = pyunit_utils.build_save_model_generic( params, x, train, "response", "glm", TMPDIR) # build and save mojo model MOJONAME = pyunit_utils.getMojoName(glmBinomialModel._id) h2o.download_csv(test[x], os.path.join( TMPDIR, 'in.csv')) # save test file, h2o predict/mojo use same file pred_h2o, pred_mojo = pyunit_utils.mojo_predict( glmBinomialModel, TMPDIR, MOJONAME) # load model and perform predict h2o.download_csv(pred_h2o, os.path.join(TMPDIR, "h2oPred.csv")) pred_pojo = pyunit_utils.pojo_predict(glmBinomialModel, TMPDIR, MOJONAME) print("Comparing mojo predict and h2o predict...") pyunit_utils.compare_frames_local( pred_h2o, pred_mojo, 0.1, tol=1e-10 ) # make sure operation sequence is preserved from Tomk h2o.save_model(glmOrdinalModel, path=TMPDIR, force=True) # save model for debugging print("Comparing pojo predict and h2o predict...") pyunit_utils.compare_frames_local(pred_mojo, pred_pojo, 0.1, tol=1e-10)
def run_comparison_tests(auto_encoder, act_fun, missing_values_handling, set_all_factor, train, test, x): # set deeplearning model parameters params = set_params(act_fun, missing_values_handling, set_all_factor, auto_encoder) if auto_encoder: try: # build and save mojo model deeplearning_model = build_save_model(params, x, train) except Exception as err: if not("Trying to predict with an unstable model" in err.args[0]): raise Exception('Deeplearning autoencoder model failed to build. Fix it.') return else: # build and save mojo model deeplearning_model = build_save_model(params, x, train) # save test file, h2o predict/mojo use same file h2o.download_csv(test[x], os.path.join(TMPDIR, 'in.csv')) # load model and perform predict pred_h2o, pred_mojo = pyunit_utils.mojo_predict(deeplearning_model, TMPDIR, MOJONAME) pred_pojo = pyunit_utils.pojo_predict(deeplearning_model, TMPDIR, MOJONAME) # save model for debugging h2o.save_model(deeplearning_model, path=TMPDIR, force=True) print("Comparing mojo predict and h2o predict...") pyunit_utils.compare_frames_local_onecolumn_NA(pred_h2o, pred_mojo, prob=1, tol=1e-10) print("Comparing pojo predict and h2o predict...") pyunit_utils.compare_frames_local_onecolumn_NA(pred_mojo, pred_pojo, prob=1, tol=1e-10)
def glm_ordinal_mojo_pojo(): h2o.remove_all() params = set_params() # set deeplearning model parameters df = random_dataset(PROBLEM) # generate random dataset train = df[NTESTROWS:, :] test = df[:NTESTROWS, :] x = list(set(df.names) - {"response"}) try: glmOrdinalModel = build_save_model( params, x, train, "response") # build and save mojo model h2o.download_csv(test[x], os.path.join( TMPDIR, 'in.csv')) # save test file, h2o predict/mojo use same file pred_h2o, pred_mojo = pyunit_utils.mojo_predict( glmOrdinalModel, TMPDIR, MOJONAME) # load model and perform predict h2o.download_csv(pred_h2o, os.path.join(TMPDIR, "h2oPred.csv")) pred_pojo = pyunit_utils.pojo_predict(glmOrdinalModel, TMPDIR, MOJONAME) print("Comparing mojo predict and h2o predict...") pyunit_utils.compare_frames_local( pred_h2o, pred_mojo, 0.1, tol=1e-10 ) # make sure operation sequence is preserved from Tomk h2o.save_model(glmOrdinalModel, path=TMPDIR, force=True) # save model for debugging print("Comparing pojo predict and h2o predict...") pyunit_utils.compare_frames_local(pred_mojo, pred_pojo, 0.1, tol=1e-10) except Exception as ex: print("*************** ERROR and type is ") print(str(type(ex))) print(ex) if "AssertionError" in str( type(ex) ): # only care if there is an AssertionError, ignore the others sys.exit(1)
def glm_multinomial_mojo_pojo(): PROBLEM = "multinomial" NTESTROWS = 200 params = set_params() # set deeplearning model parameters df = pyunit_utils.random_dataset(PROBLEM) # generate random dataset train = df[NTESTROWS:, :] test = df[:NTESTROWS, :] x = list(set(df.names) - {"response"}) glmMultinomialModel = pyunit_utils.build_save_model_GLM( params, x, train, "response") # build and save mojo model MOJONAME = pyunit_utils.getMojoName(glmMultinomialModel._id) TMPDIR = os.path.normpath( os.path.join(os.path.dirname(os.path.realpath('__file__')), "..", "results", MOJONAME)) h2o.download_csv(test[x], os.path.join( TMPDIR, 'in.csv')) # save test file, h2o predict/mojo use same file pred_h2o, pred_mojo = pyunit_utils.mojo_predict( glmMultinomialModel, TMPDIR, MOJONAME) # load model and perform predict h2o.download_csv(pred_h2o, os.path.join(TMPDIR, "h2oPred.csv")) pred_pojo = pyunit_utils.pojo_predict(glmMultinomialModel, TMPDIR, MOJONAME) print("Comparing mojo predict and h2o predict...") pyunit_utils.compare_frames_local( pred_h2o, pred_mojo, 0.1, tol=1e-10 ) # make sure operation sequence is preserved from Tomk h2o.save_model(glmOrdinalModel, path=TMPDIR, force=True) # save model for debugging print("Comparing pojo predict and h2o predict...") pyunit_utils.compare_frames_local(pred_mojo, pred_pojo, 0.1, tol=1e-10)
def deeplearning_mojo_pojo(): h2o.remove_all() params = set_params() # set deeplearning model parameters df = random_dataset(PROBLEM) # generate random dataset train = df[NTESTROWS:, :] test = df[:NTESTROWS, :] x = list(set(df.names) - {"response"}) try: deeplearningModel = build_save_model(params, x, train) # build and save mojo model h2o.download_csv(test[x], os.path.join(TMPDIR, 'in.csv')) # save test file, h2o predict/mojo use same file pred_h2o, pred_mojo = pyunit_utils.mojo_predict(deeplearningModel, TMPDIR, MOJONAME) # load model and perform predict pred_pojo = pyunit_utils.pojo_predict(deeplearningModel, TMPDIR, MOJONAME) h2o.save_model(deeplearningModel, path=TMPDIR, force=True) # save model for debugging print("Comparing mojo predict and h2o predict...") pyunit_utils.compare_numeric_frames(pred_h2o, pred_mojo, 0.1, tol=1e-10) # make sure operation sequence is preserved from Tomk print("Comparing pojo predict and h2o predict...") pyunit_utils.compare_numeric_frames(pred_mojo, pred_pojo, 0.1, tol=1e-10) except Exception as ex: print("*************** ERROR and type is ") print(str(type(ex))) print(ex) if "AssertionError" in str(type(ex)): # only care if there is an AssertionError, ignore the others sys.exit(1)
def runComparisonTests(autoEncoder, probleyType): params = set_params(autoEncoder) # set deeplearning model parameters df = random_dataset(probleyType) # generate random dataset train = df[NTESTROWS:, :] test = df[:NTESTROWS, :] x = list(set(df.names) - {"response"}) deeplearningModel = build_save_model(params, x, train) # build and save mojo model h2o.download_csv(test[x], os.path.join(TMPDIR, 'in.csv')) # save test file, h2o predict/mojo use same file pred_h2o, pred_mojo = pyunit_utils.mojo_predict(deeplearningModel, TMPDIR, MOJONAME) # load model and perform predict pred_pojo = pyunit_utils.pojo_predict(deeplearningModel, TMPDIR, MOJONAME) h2o.save_model(deeplearningModel, path=TMPDIR, force=True) # save model for debugging print("Comparing mojo predict and h2o predict...") pyunit_utils.compare_frames_local_onecolumn_NA(pred_h2o, pred_mojo, prob=1, tol=1e-10) print("Comparing pojo predict and h2o predict...") pyunit_utils.compare_frames_local_onecolumn_NA(pred_mojo, pred_pojo, prob=1, tol=1e-10)
def compare_preds(train, test, x, y, booster, ntrees, max_depth, max_error): model = H2OXGBoostEstimator(booster=booster, seed=1, ntrees=ntrees, max_depth=max_depth) model.train(training_frame=train, x=x, y=y) mojo_name = pyunit_utils.getMojoName(model._id) tmp_dir = os.path.normpath( os.path.join(os.path.dirname(os.path.realpath('__file__')), "..", "results", mojo_name)) os.makedirs(tmp_dir) model.download_mojo(path=tmp_dir) h2o.download_csv(test[x], os.path.join(tmp_dir, 'in.csv')) pred_h2o = model.predict(test[x]) h2o.download_csv(pred_h2o, os.path.join(tmp_dir, "out_h2o.csv")) pred_pojo = pyunit_utils.pojo_predict(model, tmp_dir, mojo_name) print("%s: Comparing pojo %s predict and h2o predict..." % (model._id, booster)) pyunit_utils.compare_frames_local(pred_h2o, pred_pojo, 1, tol=max_error)
def glm_multinomial_mojo_pojo(): PROBLEM="multinomial" NTESTROWS=200 params = set_params() # set deeplearning model parameters df = pyunit_utils.random_dataset(PROBLEM) # generate random dataset train = df[NTESTROWS:, :] test = df[:NTESTROWS, :] x = list(set(df.names) - {"response"}) glmMultinomialModel = pyunit_utils.build_save_model_GLM(params, x, train, "response") # build and save mojo model MOJONAME = pyunit_utils.getMojoName(glmMultinomialModel._id) TMPDIR = os.path.normpath(os.path.join(os.path.dirname(os.path.realpath('__file__')), "..", "results", MOJONAME)) h2o.download_csv(test[x], os.path.join(TMPDIR, 'in.csv')) # save test file, h2o predict/mojo use same file pred_h2o, pred_mojo = pyunit_utils.mojo_predict(glmMultinomialModel, TMPDIR, MOJONAME) # load model and perform predict h2o.download_csv(pred_h2o, os.path.join(TMPDIR, "h2oPred.csv")) pred_pojo = pyunit_utils.pojo_predict(glmMultinomialModel, TMPDIR, MOJONAME) print("Comparing mojo predict and h2o predict...") pyunit_utils.compare_frames_local(pred_h2o, pred_mojo, 0.1, tol=1e-10) # make sure operation sequence is preserved from Tomk h2o.save_model(glmOrdinalModel, path=TMPDIR, force=True) # save model for debugging print("Comparing pojo predict and h2o predict...") pyunit_utils.compare_frames_local(pred_mojo, pred_pojo, 0.1, tol=1e-10)
def runComparisonTests(autoEncoder, probleyType): params = set_params(autoEncoder) # set deeplearning model parameters df = random_dataset(probleyType) # generate random dataset train = df[NTESTROWS:, :] test = df[:NTESTROWS, :] x = list(set(df.names) - {"response"}) if autoEncoder: try: deeplearningModel = build_save_model( params, x, train) # build and save mojo model except Exception as err: if not ("Trying to predict with an unstable model" in err.args[0]): raise Exception( 'Deeplearning autoencoder model failed to build. Fix it.') return else: deeplearningModel = build_save_model( params, x, train) # build and save mojo model h2o.download_csv(test[x], os.path.join( TMPDIR, 'in.csv')) # save test file, h2o predict/mojo use same file pred_h2o, pred_mojo = pyunit_utils.mojo_predict( deeplearningModel, TMPDIR, MOJONAME) # load model and perform predict pred_pojo = pyunit_utils.pojo_predict(deeplearningModel, TMPDIR, MOJONAME) h2o.save_model(deeplearningModel, path=TMPDIR, force=True) # save model for debugging print("Comparing mojo predict and h2o predict...") pyunit_utils.compare_frames_local_onecolumn_NA(pred_h2o, pred_mojo, prob=1, tol=1e-10) print("Comparing pojo predict and h2o predict...") pyunit_utils.compare_frames_local_onecolumn_NA(pred_mojo, pred_pojo, prob=1, tol=1e-10)