kwargs.update(json.loads(params)) kwargs[ 'model_id'] = 'DSS.H2O_connector.model.' + output_folder.full_name + '.' + algorithm algorithms = { 'autoencoder': h2o.h2o.autoencoder, 'deeplearning': h2o.h2o.deeplearning, 'gbm': h2o.h2o.gbm, 'glm': h2o.h2o.glm, 'glrm': h2o.h2o.glrm, 'kmeans': h2o.h2o.kmeans, 'naive_bayes': h2o.h2o.naive_bayes, 'prcomp': h2o.h2o.prcomp, 'random_forest': h2o.h2o.random_forest, 'svd': h2o.h2o.svd, } # print 'Arguments passed to H2O: ', kwargs # This makes the job fail with exception None ?? model = algorithms[algorithm](**kwargs) ## save model summary in output_folder and model to disk with open(os.path.join(output_folder_path, 'model_summary.txt'), 'w') as file: orig_stdout = sys.stdout sys.stdout = file model.show() # this method uses print to write to stdout sys.stdout = orig_stdout h2o.h2o.save_model(model, saved_model_folder(model_config, output_folder), force=True) # "force" means overwrite
## load model def find_model_id(folder): with open(os.path.join(folder.get_path(), 'model_summary.txt')) as file: for line in file: match = re.match('Model Key: (DSS\.H2O_connector\.model\.'+folder.full_name+'\..*)$', line) if match: return match.group(1) raise Exception('Could not find model id in model_summary.txt') model_id = find_model_id(input_folder) try: model = h2o.h2o.get_model(model_id) except EnvironmentError as e: print 'Model key unknown to H2O:\n“',e,'”\n','Will thus load from saved model.' path = os.path.join(saved_model_folder(model_config, input_folder), model_id) model = h2o.h2o.load_model(path=path) ## compute predictions and add input columns to output predict_frame = model.predict(input_frame) print "predict_frame id:", predict_frame.frame_id print "predict_frame types:", predict_frame.types sys.stdout.flush() for col in get_recipe_config()["columns_to_copy_to_output"]: predict_frame[col] = input_frame[col] ## save output def DSS_type(col_name): return { 'enum':'string',
for line in file: match = re.match( 'Model Key: (DSS\.H2O_connector\.model\.' + folder.full_name + '\..*)$', line) if match: return match.group(1) raise Exception('Could not find model id in model_summary.txt') model_id = find_model_id(input_folder) try: model = h2o.h2o.get_model(model_id) except EnvironmentError as e: print 'Model key unknown to H2O:\n“', e, '”\n', 'Will thus load from saved model.' path = os.path.join(saved_model_folder(model_config, input_folder), model_id) model = h2o.h2o.load_model(path=path) ## compute predictions and add input columns to output predict_frame = model.predict(input_frame) print "predict_frame id:", predict_frame.frame_id print "predict_frame types:", predict_frame.types sys.stdout.flush() for col in get_recipe_config()["columns_to_copy_to_output"]: predict_frame[col] = input_frame[col] ## save output def DSS_type(col_name):
kwargs['model_id'] = 'DSS.H2O_connector.model.' + output_folder.full_name + '.' + algorithm algorithms = { 'autoencoder': h2o.h2o.autoencoder, 'deeplearning': h2o.h2o.deeplearning, 'gbm': h2o.h2o.gbm, 'glm': h2o.h2o.glm, 'glrm': h2o.h2o.glrm, 'kmeans': h2o.h2o.kmeans, 'naive_bayes': h2o.h2o.naive_bayes, 'prcomp': h2o.h2o.prcomp, 'random_forest': h2o.h2o.random_forest, 'svd': h2o.h2o.svd, } # print 'Arguments passed to H2O: ', kwargs # This makes the job fail with exception None ?? model = algorithms[algorithm](**kwargs) ## save model summary in output_folder and model to disk with open(os.path.join(output_folder_path, 'model_summary.txt'),'w') as file: orig_stdout = sys.stdout sys.stdout = file model.show() # this method uses print to write to stdout sys.stdout = orig_stdout h2o.h2o.save_model( model, saved_model_folder(model_config, output_folder), force=True) # "force" means overwrite