def init(): # 加载模型 try: global model print("尝试加载PipelineModel") model = PipelineModel.load(local_model_path)#加载模型 except: try: # H2O模型必须走这里 from pysparkling.ml import H2OMOJOSettings, H2OMOJOModel print("从加载PipelineModel的try中跳出") print("在except的try中尝试加载H2OMOJOModel") settings = H2OMOJOSettings(withDetailedPredictionCol=True) model = H2OMOJOModel.createFromMojo(local_model_path + '/mojo_model', settings) except: global pipeline_model print("从加载H2OMOJOModel的try中跳出") print("尝试加载XGBModel") # model = XGBoostClassificationModel.load(local_model_path) model = load_xgb_model(local_model_path,m_type='XGBoostClassificationModel') if not model: logging.error('XGBoostClassificationModel没有加载成功') pipeline_model = load_xgb_model(local_model_path, "PipelineModel") if not pipeline_model: logging.error('XGB需要的pipelinemodel没有加载成功') logging.error(pipeline_model) global final_transform_json_path final_transform_json_path = get_jsonfile_fullname() # 读取json,model_json: 模型中存储的json with open(final_transform_json_path, encoding='utf-8') as f: global model_json model_json = json.load(f)
def func(): try: global model print("尝试加载PipelineModel") model = PipelineModel.load(local_model_path) # 加载模型 print("加载pipeline模型成功") except: try: # H2O模型必须走这里 from pysparkling.ml import H2OMOJOSettings, H2OMOJOModel print("从加载PipelineModel的try中跳出") print("在except的try中尝试加载H2OMOJOModel") settings = H2OMOJOSettings(withDetailedPredictionCol=True) model = H2OMOJOModel.createFromMojo( local_model_path + '/mojo_model', settings) except: global pipeline_model print("从加载H2OMOJOModel的try中跳出") print("尝试加载XGBModel") # model = XGBoostClassificationModel.load(local_model_path) model = load_xgb_model(local_model_path, m_type='XGBoostClassificationModel') if not model: logging.error('XGBoostClassificationModel没有加载成功') pipeline_model = load_xgb_model(local_model_path, "PipelineModel") if not pipeline_model: logging.error('XGB需要的pipelinemodel没有加载成功') logging.error(pipeline_model) return model, pipeline_model
def test_h2o_mojo_pipeline_predictions(self): # Try loading the Mojo and prediction on it without starting H2O Context path = "file://" + os.path.abspath( "../ml/src/test/resources/mojo2data/pipeline.mojo") settings = H2OMOJOSettings(namedMojoOutputColumns=False) mojo = H2OMOJOPipelineModel.createFromMojo(path, settings) prostateFrame = self._spark.read.csv( "file://" + unit_test_utils.locate("smalldata/prostate/prostate.csv"), header=True) preds = mojo.transform(prostateFrame).repartition(1) normalSelection = preds.select("prediction.preds").take(5) assert normalSelection[0][0][0] == 65.36320409515132 assert normalSelection[1][0][0] == 64.96902128114817 assert normalSelection[2][0][0] == 64.96721023747583 assert normalSelection[3][0][0] == 65.78772654671035 assert normalSelection[4][0][0] == 66.11327967814829 udfSelection = preds.select(mojo.selectPredictionUDF("AGE")).take(5) assert udfSelection[0][0] == 65.36320409515132 assert udfSelection[1][0] == 64.96902128114817 assert udfSelection[2][0] == 64.96721023747583 assert udfSelection[3][0] == 65.78772654671035 assert udfSelection[4][0] == 66.11327967814829
def h2o_model_load(self, path): """ 加载h2o model :param path: :return: """ full_path = self.concat_path(path, self.model_key) from pysparkling.ml import H2OMOJOSettings, H2OMOJOModel settings = H2OMOJOSettings(withDetailedPredictionCol=False) model = H2OMOJOModel.createFromMojo(full_path + "/mojo_model", settings) return model
def testMojoPredictionsUnseenCategoricals(spark): path = "file://" + os.path.abspath( "../ml/src/test/resources/deep_learning_airlines_categoricals.zip") settings = H2OMOJOSettings(convertUnknownCategoricalLevelsToNa=True) mojo = H2OMOJOModel.createFromMojo(path, settings) rowForScoring = Row("sepal_len", "sepal_wid", "petal_len", "petal_wid", "class") df = spark.createDataFrame( spark.sparkContext.parallelize([ (5.1, 3.5, 1.4, 0.2, "Missing_categorical") ]).map(lambda r: rowForScoring(*r))) data = mojo.transform(df).collect()[0] assert data["class"] == "Missing_categorical" assert data["petal_len"] == 1.4 assert data["petal_wid"] == 0.2 assert data["sepal_len"] == 5.1 assert data["sepal_wid"] == 3.5 assert data["prediction"] == 5.240174068202646
def init(): global model_tag global pmmlFields # 下载模型 download_model.download_model(download_model_zip_path, unzip_path) try: #如果模型路径下存在pmml文件,那么直接加载pmml模型 #pmml文件压缩包的结构是model/xxx.pmml文件 #因为pmml文件结构的特殊性,所以解压函数要修改代码 model_path_childs = os.listdir(local_model_path) logging.info(f'模型文件夹下的文件有:{model_path_childs}') for child in model_path_childs: if child.endswith(".pmml"): full_path = os.path.join(local_model_path, child) break #或者是保存在model/model/part-00000中的pmml模型 elif child == "model": for file in os.listdir(os.path.join(local_model_path,"model")): if file.startswith("part"): full_path = local_model_path + "/model/" + file break logging.info(f'获取到的模型路径是:{full_path}') print("模型大小是:",os.path.getsize(full_path)) global pmmlModel pmmlModel = loadPmml.fromFile(full_path) pmmlFields = parse_xml(full_path) logging.info(f'成功加载pmml模型') model_tag = 1 except: logging.info("从pmml模型的加载处理中跳出") # 获取模型路径 get_model_path(local_model_path) # 加载模型 try: global model logging.info("尝试加载PipelineModel") model = PipelineModel.load(local_model_path)#加载模型 model_tag = 2 except: try: # H2O模型必须走这里 from pysparkling.ml import H2OMOJOSettings, H2OMOJOModel logging.info("从加载PipelineModel的try中跳出") print("在except的try中尝试加载H2OMOJOModel") settings = H2OMOJOSettings(withDetailedPredictionCol=True) model = H2OMOJOModel.createFromMojo(local_model_path + '/mojo_model', settings) model_tag = 3 except: global pipeline_model print("从加载H2OMOJOModel的try中跳出") print("尝试加载XGBModel") # model = XGBoostClassificationModel.load(local_model_path) model = load_xgb_model(local_model_path,m_type='XGBoostClassificationModel') if not model: logging.error('XGBoostClassificationModel没有加载成功') pipeline_model = load_xgb_model(local_model_path, "PipelineModel") if not pipeline_model: logging.error('XGB需要的pipelinemodel没有加载成功') logging.error(pipeline_model) model_tag = 4 global final_transform_json_path final_transform_json_path = get_jsonfile_fullname() # 读取json,model_json: 模型中存储的json with open(final_transform_json_path, encoding='utf-8') as f: global model_json model_json = json.load(f)