def __init__(self, pmml): self.nyoka_pmml = ny.parse(pmml, True) self.image_input = None self.layer_input = None self.model = None self.layers_outputs = {} self.model = self._build_model()
def test_lgbm_02(self): auto = pd.read_csv('nyoka/tests/auto-mpg.csv') feature_names = [ name for name in auto.columns if name not in ('mpg', 'car name') ] target_name = 'mpg' f_name = "lgbmr_pmml.pmml" model = LGBMRegressor() pipeline_obj = Pipeline([('lgbmr', model)]) pipeline_obj.fit(auto[feature_names], auto[target_name]) lgb_to_pmml(pipeline_obj, feature_names, target_name, f_name) pmml_obj = pml.parse(f_name, True) pmml_value_list = [] model_value_list = [] pmml_score_list = [] model_score_list = [] seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment for seg in seg_tab: for node in seg.TreeModel.Node.Node: varlen = node.get_Node().__len__() if varlen > 0: pmml_value_list.append(node.SimplePredicate.value) self.extractValues(node, pmml_value_list, pmml_score_list) else: pmml_value_list.append(node.SimplePredicate.value) pmml_score_list.append(node.score) main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] main_key_value.append(tree) for i in range(len(main_key_value)): list_score_temp = [] list_val_temp = [] node_list = main_key_value[i] self.create_node(node_list, list_score_temp, list_val_temp) model_score_list = model_score_list + list_score_temp model_value_list = model_value_list + list_val_temp list_val_temp.clear() list_score_temp.clear() ##1 for model_val, pmml_val in zip(model_score_list, pmml_score_list): self.assertEqual(model_val, float(pmml_val)) ##2 for model_val, pmml_val in zip(model_value_list, pmml_value_list): self.assertEqual(model_val, pmml_val) ##3 self.assertEqual(os.path.isfile(f_name), True)
def test_sklearn_01(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data,columns=iris.feature_names) irisd['Species'] = iris.target features = irisd.columns.drop('Species') target = 'Species' f_name = "svc_pmml.pmml" model = SVC() pipeline_obj = Pipeline([ ('svm',model) ]) pipeline_obj.fit(irisd[features],irisd[target]) skl_to_pmml(pipeline_obj,features,target,f_name) pmml_obj = pml.parse(f_name,True) ## 1 svms = pmml_obj.SupportVectorMachineModel[0].SupportVectorMachine for mod_val, recon_val in zip(model.intercept_, svms): self.assertEqual("{:.16f}".format(mod_val), "{:.16f}".format(recon_val.Coefficients.absoluteValue)) ## 2 svm = pmml_obj.SupportVectorMachineModel[0] self.assertEqual(svm.RadialBasisKernelType.gamma,model._gamma)
def test_sklearn_03(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data, columns=iris.feature_names) irisd['Species'] = iris.target features = irisd.columns.drop('Species') target = 'Species' f_name = "rf_pmml.pmml" model = RandomForestClassifier(n_estimators = 100) pipeline_obj = Pipeline([ ("mapping", DataFrameMapper([ (['sepal length (cm)', 'sepal width (cm)'], StandardScaler()) , (['petal length (cm)', 'petal width (cm)'], Imputer()) ])), ("rfc", model) ]) pipeline_obj.fit(irisd[features], irisd[target]) skl_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name,True) ## 1 self.assertEqual(model.n_estimators,pmml_obj.MiningModel[0].Segmentation.Segment.__len__()) ##2 self.assertEqual(pmml_obj.MiningModel[0].Segmentation.multipleModelMethod, "majorityVote")
def test_sklearn_02(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data,columns=iris.feature_names) irisd['Species'] = iris.target features = irisd.columns.drop('Species') target = 'Species' f_name = "knn_pmml.pmml" pipeline_obj = Pipeline([ ('scaling',StandardScaler()), ('knn',KNeighborsClassifier(n_neighbors = 5)) ]) pipeline_obj.fit(irisd[features],irisd[target]) skl_to_pmml(pipeline_obj,features,target,f_name) pmml_obj = pml.parse(f_name,True) ##1 self.assertIsNotNone(pmml_obj.NearestNeighborModel[0].ComparisonMeasure.euclidean) ##2 self.assertEqual(pmml_obj.NearestNeighborModel[0].ComparisonMeasure.kind, "distance") ##3 self.assertEqual(pipeline_obj.steps[-1][-1].n_neighbors, pmml_obj.NearestNeighborModel[0].numberOfNeighbors)
def test_sklearn_04(self): titanic = pd.read_csv("nyoka/tests/titanic_train.csv") features = titanic.columns target = 'Survived' f_name = "gb_pmml.pmml" pipeline_obj = Pipeline([ ("imp", Imputer(strategy="median")), ("gbc", GradientBoostingClassifier(n_estimators = 10)) ]) pipeline_obj.fit(titanic[features],titanic[target]) skl_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name,True) ##1 self.assertEqual(pmml_obj.MiningModel[0].Segmentation.multipleModelMethod, "modelChain") ##2 self.assertEqual(pmml_obj.MiningModel[0].Segmentation.Segment.__len__(), 2) ##3 self.assertEqual(pmml_obj.MiningModel[0].Segmentation.Segment[1].RegressionModel.normalizationMethod, "logit")
def test_sklearn_06(self): df = pd.read_csv('nyoka/tests/auto-mpg.csv') X = df.drop(['mpg','car name'],axis=1) y = df['mpg'] features = X.columns target = 'mpg' f_name = "linearregression_pmml.pmml" model = LinearRegression() pipeline_obj = Pipeline([ ('model',model) ]) pipeline_obj.fit(X,y) skl_to_pmml(pipeline_obj,features,target,f_name) pmml_obj = pml.parse(f_name, True) ## 1 reg_tab = pmml_obj.RegressionModel[0].RegressionTable[0] self.assertEqual(reg_tab.intercept,model.intercept_) ## 2 for model_val, pmml_val in zip(model.coef_, reg_tab.NumericPredictor): self.assertEqual("{:.16f}".format(model_val),"{:.16f}".format(pmml_val.coefficient))
def test_plain_text_script(self): model = applications.MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) x = model.output x = Flatten()(x) x = Dense(1024, activation="relu")(x) predictions = Dense(2, activation='sigmoid')(x) model_final = Model(inputs=model.input, outputs=predictions, name='predictions') script_content = open("nyoka/tests/preprocess.py", 'r').read() pmml_obj = KerasToPmml(model_final, dataSet='image', predictedClasses=['cat', 'dog'], script_args={ "content": script_content, "def_name": "getBase64EncodedString", "return_type": "string", "encode": False }) pmml_obj.export(open("script_with_keras_plain.pmml", 'w'), 0) self.assertEqual(os.path.isfile("script_with_keras_plain.pmml"), True) reconPmmlObj = ny.parse("script_with_keras_plain.pmml", True) content = reconPmmlObj.TransformationDictionary.DefineFunction[ 0].Apply.Extension[0].anytypeobjs_ content[0] = content[0].replace("\t", "") content = "\n".join(content) self.assertEqual(script_content, content) self.assertEqual(len(model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
def test_keras_02(self): boston = load_boston() data = pd.DataFrame(boston.data) features = list(boston.feature_names) target = 'PRICE' data.columns = features data['PRICE'] = boston.target x_train, x_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.20, random_state=42) model = Sequential() model.add( Dense(13, input_dim=13, kernel_initializer='normal', activation='relu')) model.add(Dense(23)) model.add(Dense(1, kernel_initializer='normal')) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(x_train, y_train, epochs=1000, verbose=0) pmmlObj = KerasToPmml(model) pmmlObj.export(open('sequentialModel.pmml', 'w'), 0) reconPmmlObj = ny.parse('sequentialModel.pmml', True) self.assertEqual(os.path.isfile("sequentialModel.pmml"), True) self.assertEqual(len(model.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer) - 1)
def test_xgboost_02(self): auto = pd.read_csv('nyoka/tests/auto-mpg.csv') feature_names = [ name for name in auto.columns if name not in ('mpg', 'car name') ] target_name = 'mpg' f_name = "xgbr_pmml.pmml" model = XGBRegressor() pipeline_obj = Pipeline([('xgbr', model)]) pipeline_obj.fit(auto[feature_names], auto[target_name]) xgboost_to_pmml(pipeline_obj, feature_names, target_name, f_name, description="A test model") pmml_obj = pml.parse(f_name, True) pmml_value_list = [] model_value_list = [] pmml_score_list = [] model_score_list = [] seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment for seg in seg_tab: for node in seg.TreeModel.Node.Node: varlen = node.get_Node().__len__() if varlen > 0: pmml_value_list.append(node.SimplePredicate.value) self.extractValues(node, pmml_value_list, pmml_score_list) else: pmml_value_list.append(node.SimplePredicate.value) pmml_score_list.append(node.score) get_nodes_in_json_format = [] for i in range(model.n_estimators): get_nodes_in_json_format.append( json.loads(model._Booster.get_dump(dump_format='json')[i])) for i in range(len(get_nodes_in_json_format)): list_score_temp = [] list_val_temp = [] node_list = get_nodes_in_json_format[i] self.create_node(node_list, list_score_temp, list_val_temp) model_score_list = model_score_list + list_score_temp model_value_list = model_value_list + list_val_temp list_val_temp.clear() list_score_temp.clear() ##1 for model_val, pmml_val in zip(model_score_list, pmml_score_list): self.assertEqual(model_val, float(pmml_val)) ##2 for model_val, pmml_val in zip(model_value_list, pmml_value_list): self.assertEqual(model_val, pmml_val) ##3 self.assertEqual(os.path.isfile(f_name), True)
def test_keras_01(self): cnn_pmml = KerasToPmml(self.model_final,model_name="MobileNet",description="Demo",\ copyright="Internal User",dataSet='image',predictedClasses=['cats','dogs']) cnn_pmml.export(open('2classMBNet.pmml', "w"), 0) reconPmmlObj = ny.parse('2classMBNet.pmml', True) self.assertEqual(os.path.isfile("2classMBNet.pmml"), True) self.assertEqual(len(self.model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
def get_output(self): """ Generates Output for RetinaNet Returns ------- Nyoka's Output object """ out_flds = [] out_flds.append( pml.OutputField(name="predicted_LabelBoxScore", dataType="string", feature="predictedValue", Extension=[ pml.Extension(extender="ADAPA", name="format", value="JSON") ])) return pml.Output(OutputField=out_flds)
def get_training_parameter(self): """ Generates TrainingParameters for RetinaNet Returns ------- Nyoka's TrainingParameters object """ train_param = pml.TrainingParameters(architectureName='retinanet') return train_param
def _nyoka_rule_set_model( self, rule_set_model: models.RuleSetModel) -> nyoka_pmml.RuleSetModel: return nyoka_pmml.RuleSetModel( functionName='classification', algorithmName='RuleSet', MiningSchema=None if rule_set_model.miningSchema is None else self._nyoka_mining_schema(rule_set_model.miningSchema), RuleSet=None if rule_set_model.ruleSet is None else self._nyoka_rule_set(rule_set_model.ruleSet))
def get_local_transformation(self): """ Generates Trasformation information for RetinaNet Returns ------- Nyoka's LocalTransformations object """ apply = pml.Apply( function='KerasRetinaNet:getBase64StringFromBufferedInput', FieldRef=[pml.FieldRef(field=self.input_format)], Constant=[ pml.Constant(valueOf_='tf' if self.backbone_name in ['mobilenet', 'densenet'] else 'caffe') ]) der_fld = pml.DerivedField(name="base64String", optype="categorical", dataType="string", Apply=apply) return pml.LocalTransformations(DerivedField=[der_fld])
def _nyoka_pmml_model( self, simple_pmml_ruleset_model: models.SimplePMMLRuleSetModel ) -> nyoka_pmml.PMML: timestamp = datetime.datetime.now( ) if self._timestamp is None else self._timestamp return nyoka_pmml.PMML( version=nyoka_constants.PMML_SCHEMA.VERSION, Header=nyoka_pmml.Header( copyright=NyokaSerializer.COPYRIGHT_STRING, description=nyoka_constants.HEADER_INFO.DEFAULT_DESCRIPTION, Timestamp=nyoka_pmml.Timestamp(timestamp), Application=nyoka_pmml.Application( name=NyokaSerializer.APPLICATION_NAME, version=version.version)), DataDictionary=None if simple_pmml_ruleset_model.dataDictionary is None else self._nyoka_data_dictionary( simple_pmml_ruleset_model.dataDictionary), RuleSetModel=None if simple_pmml_ruleset_model.ruleSetModel is None else [ self._nyoka_rule_set_model( simple_pmml_ruleset_model.ruleSetModel) ])
def test_keras_01(self): model = applications.MobileNet(weights='imagenet', include_top=False,input_shape = (224, 224,3)) activType='sigmoid' x = model.output x = Flatten()(x) x = Dense(1024, activation="relu")(x) predictions = Dense(2, activation=activType)(x) model_final = Model(inputs =model.input, outputs = predictions,name='predictions') cnn_pmml = KerasToPmml(model_final,model_name="MobileNet",description="Demo",\ copyright="Internal User",dataSet='image',predictedClasses=['cats','dogs']) cnn_pmml.export(open('2classMBNet.pmml', "w"), 0) reconPmmlObj=ny.parse('2classMBNet.pmml',True) self.assertEqual(os.path.isfile("2classMBNet.pmml"),True) self.assertEqual(len(model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
def test_construction_vgg(self): model = applications.VGG16(weights = "imagenet", include_top=False,input_shape = (224, 224, 3)) x = model.output x = layers.Flatten()(x) x = layers.Dense(1024, activation="relu")(x) x = layers.Dropout(0.5)(x) x = layers.Dense(1024, activation="relu")(x) predictions = layers.Dense(2, activation="softmax")(x) model_final = models.Model(input = model.input, output = predictions) model_final.compile(loss = "binary_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"]) pmmlObj=KerasToPmml(model_final,model_name="VGG16",dataSet='image') pmmlObj.export(open('vgg.pmml','w'),0) reconPmmlObj=ny.parse('vgg.pmml',True) self.assertEqual(os.path.isfile("vgg.pmml"),True) self.assertEqual(len(model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
def test_02(self): backbone = 'mobilenet' RetinanetToPmml(self.model, input_shape=(224, 224, 3), backbone_name=backbone, pmml_file_name="retinanet_with_coco_2.pmml") recon_pmml_obj = pml.parse("retinanet_with_coco_2.pmml", True) binary_buffered = recon_pmml_obj.DataDictionary.DataField[0].Extension[ 0].value self.assertEqual(binary_buffered, 'true') function = recon_pmml_obj.DeepNetwork[ 0].LocalTransformations.DerivedField[0].Apply.function self.assertEqual(function, 'KerasRetinaNet:getBase64StringFromBufferedInput') scaling = recon_pmml_obj.DeepNetwork[ 0].LocalTransformations.DerivedField[0].Apply.Constant[0].valueOf_ self.assertEqual(scaling, 'tf')
def test_05(self): backbone = 'resnet' script_content = open("nyoka/tests/preprocess.py", 'r').read() RetinanetToPmml(self.model, input_shape=(224, 224, 3), backbone_name=backbone, pmml_file_name="retinanet_with_coco_2.pmml", script_args={ "content": script_content, "def_name": "getBase64EncodedString", "return_type": "string", "encode": True }) recon_pmml_obj = pml.parse("retinanet_with_coco_2.pmml", True) content = recon_pmml_obj.TransformationDictionary.DefineFunction[ 0].Apply.Extension[0].anytypeobjs_[0] content = base64.b64decode(content).decode() self.assertEqual(script_content, content)
def test_sklearn_07(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data, columns=iris.feature_names) irisd['Species'] = iris.target features = irisd.columns.drop('Species') target = 'Species' f_name = "logisticregression_pmml.pmml" model = LogisticRegression() pipeline_obj = Pipeline([ ("mapping", DataFrameMapper([(['sepal length (cm)', 'sepal width (cm)'], StandardScaler()), (['petal length (cm)', 'petal width (cm)'], Imputer())])), ("lr", model) ]) pipeline_obj.fit(irisd[features], irisd[target]) skl_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name, True) ## 1 segmentation = pmml_obj.MiningModel[0].Segmentation self.assertEqual(segmentation.Segment.__len__(), model.classes_.__len__() + 1) ## 2 self.assertEqual(segmentation.multipleModelMethod, "modelChain") ##3 self.assertEqual( segmentation.Segment[-1].RegressionModel.normalizationMethod, "simplemax") ##4 for i in range(model.classes_.__len__()): self.assertEqual( segmentation.Segment[i].RegressionModel.normalizationMethod, "logit") self.assertEqual("{:.16f}".format(model.intercept_[i]),\ "{:.16f}".format(segmentation.Segment[i].RegressionModel.RegressionTable[0].intercept))
def test_03_encoded_script(self): script_content = open("nyoka/tests/preprocess.py", 'r').read() pmml_obj = KerasToPmml(self.model_final, dataSet='image', predictedClasses=['cat', 'dog'], script_args={ "content": script_content, "def_name": "getBase64EncodedString", "return_type": "string", "encode": True }) pmml_obj.export(open("script_with_keras.pmml", 'w'), 0) self.assertEqual(os.path.isfile("script_with_keras.pmml"), True) reconPmmlObj = pml.parse("script_with_keras.pmml", True) content = reconPmmlObj.TransformationDictionary.DefineFunction[ 0].Apply.Extension[0].anytypeobjs_[0] content = base64.b64decode(content).decode() self.assertEqual(script_content, content) self.assertEqual(len(self.model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
def get_outer_segmentation(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values, model_name): """ It returns the Segmentation element of the model. Parameters ---------- model : Contains LGB model object. derived_col_names : List Contains column names after preprocessing. col_names : List Contains list of feature/column names. target_name : String Name of the Target column. mining_imp_val : tuple Contains the mining_attributes,mining_strategy, mining_impute_value categoric_values : tuple Contains Categorical attribute names and its values model_name : string Name of the model Returns ------- segmentation : Get the outer most Segmentation of an LGB model """ if 'LGBMRegressor' in str(model.__class__): segmentation = get_segments(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values, model_name) else: segmentation = pml.Segmentation( multipleModelMethod=get_multiple_model_method(model), Segment=get_segments(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values, model_name)) return segmentation
def get_segments_for_lgbr(model, derived_col_names, feature_names, target_name, mining_imp_val, categorical_values): """ It returns all the Segments element of the model Parameters ---------- model : Contains LGB model object. derived_col_names : List Contains column names after preprocessing. feature_names : List Contains list of feature/column names. target_name : List Name of the Target column. mining_imp_val : tuple Contains the mining_attributes,mining_strategy, mining_impute_value categoric_values : tuple Contains Categorical attribute names and its values Returns ------- segment : Get the Segmentation element which contains inner segments. """ segments = list() main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] main_key_value.append(tree) segmentation = pml.Segmentation( multipleModelMethod=MULTIPLE_MODEL_METHOD.SUM, Segment=generate_Segments_Equal_To_Estimators(main_key_value, derived_col_names, feature_names)) return segmentation
def test_sklearn_04(self): titanic = pd.read_csv("nyoka/tests/titanic_train.csv") titanic['Embarked'] = titanic['Embarked'].fillna('S') features = list( titanic.columns.drop( ['PassengerId', 'Name', 'Ticket', 'Cabin', 'Survived'])) target = 'Survived' f_name = "gb_pmml.pmml" pipeline_obj = Pipeline([ ("mapping", DataFrameMapper([(['Sex'], LabelEncoder()), (['Embarked'], LabelEncoder())])), ("imp", Imputer(strategy="median")), ("gbc", GradientBoostingClassifier(n_estimators=10)) ]) pipeline_obj.fit(titanic[features], titanic[target]) skl_to_pmml(pipeline_obj, features, target, f_name) pmml_obj = pml.parse(f_name, True) ##1 self.assertEqual( pmml_obj.MiningModel[0].Segmentation.multipleModelMethod, "modelChain") ##2 self.assertEqual( pmml_obj.MiningModel[0].Segmentation.Segment.__len__(), 2) ##3 self.assertEqual( pmml_obj.MiningModel[0].Segmentation.Segment[1].RegressionModel. normalizationMethod, "logit")
def generate_Segments_Equal_To_Estimators(val, derived_col_names, col_names): """ It returns number of Segments equal to the estimator of the model. Parameters ---------- val : List Contains nodes in json format. derived_col_names : List Contains column names after preprocessing. col_names : List Contains list of feature/column names. Returns ------- segments_equal_to_estimators : Returns list of segments equal to number of estimator of the model """ segments_equal_to_estimators = [] for i in range(len(val)): main_node = pml.Node(True_=pml.True_()) mining_field_for_innner_segments = col_names m_flds = [] create_node(val[i], main_node, derived_col_names) for name in mining_field_for_innner_segments: m_flds.append(pml.MiningField(name=name)) segments_equal_to_estimators.append((pml.Segment( id=i + 1, True_=pml.True_(), TreeModel=pml.TreeModel( functionName=MINING_FUNCTION.REGRESSION, modelName="DecisionTreeModel", missingValueStrategy="none", noTrueChildStrategy="returnLastPrediction", splitCharacteristic=TREE_SPLIT_CHARACTERISTIC.MULTI, Node=main_node, MiningSchema=pml.MiningSchema(MiningField=m_flds))))) return segments_equal_to_estimators
def get_ensemble_models(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values, model_name): """ It returns the Mining Model element of the model Parameters ---------- model : Contains LGB model object. derived_col_names : List Contains column names after preprocessing. col_names : List Contains list of feature/column names. target_name : String Name of the Target column. mining_imp_val : tuple Contains the mining_attributes,mining_strategy, mining_impute_value. categoric_values : tuple Contains Categorical attribute names and its values model_name : string Name of the model Returns ------- Returns the MiningModel for the given LGB model """ model_kwargs = sklToPmml.get_model_kwargs(model, col_names, target_name, mining_imp_val, categoric_values) mining_models = list() mining_models.append( pml.MiningModel(modelName=model_name if model_name else "LightGBModel", Segmentation=get_outer_segmentation( model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values, model_name), **model_kwargs)) return mining_models
def get_segments_for_lgbc(model, derived_col_names, feature_names, target_name, mining_imp_val, categoric_values, model_name): """ It returns all the segments of the LGB classifier. Parameters ---------- model : Contains LGB model object. derived_col_names : List Contains column names after preprocessing. feature_names: List Contains list of feature/column names. target_name : String Name of the Target column. mining_imp_val : tuple Contains the mining_attributes,mining_strategy, mining_impute_value categoric_values : tuple Contains Categorical attribute names and its values model_name : string Name of the model Returns ------- regrs_models : Returns all the segments of the LGB model. """ segments = list() if model.n_classes_ == 2: main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] main_key_value.append(tree) mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment( feature_names) outputField = list() outputField.append( pml.OutputField(name="lgbValue", optype=OPTYPE.CONTINUOUS, dataType=DATATYPE.DOUBLE, feature=RESULT_FEATURE.PREDICTED_VALUE, isFinalResult="false")) out = pml.Output(OutputField=outputField) oField = list() oField.append("lgbValue") segments_equal_to_estimators = generate_Segments_Equal_To_Estimators( main_key_value, derived_col_names, feature_names) First_segment = xgboostToPmml.add_segmentation( model, segments_equal_to_estimators, mining_schema_for_1st_segment, out, 1) reg_model = sklToPmml.get_regrs_models(model, oField, oField, target_name, mining_imp_val, categoric_values, model_name)[0] reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.LOGISTIC last_segment = pml.Segment(True_=pml.True_(), id=2, RegressionModel=reg_model) segments.append(First_segment) segments.append(last_segment) else: main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] main_key_value.append(tree) oField = list() for index in range(0, model.n_classes_): inner_segment = [] for in_seg in range(index, len(main_key_value), model.n_classes_): inner_segment.append(main_key_value[in_seg]) mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment( feature_names) outputField = list() outputField.append( pml.OutputField(name='lgbValue(' + str(index) + ')', optype=OPTYPE.CONTINUOUS, feature=RESULT_FEATURE.PREDICTED_VALUE, dataType=DATATYPE.FLOAT, isFinalResult="true")) out = pml.Output(OutputField=outputField) oField.append('lgbValue(' + str(index) + ')') segments_equal_to_estimators = generate_Segments_Equal_To_Estimators( inner_segment, derived_col_names, feature_names) segments_equal_to_class = xgboostToPmml.add_segmentation( model, segments_equal_to_estimators, mining_schema_for_1st_segment, out, index) segments.append(segments_equal_to_class) reg_model = sklToPmml.get_regrs_models(model, oField, oField, target_name, mining_imp_val, categoric_values, model_name)[0] reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.SOFTMAX last_segment = pml.Segment(True_=pml.True_(), id=model.n_classes_ + 1, RegressionModel=reg_model) segments.append(last_segment) return segments
def lgb_to_pmml(pipeline, col_names, target_name, pmml_f_name='from_lgbm.pmml', model_name=None, description=None): """ Exports LGBM pipeline object into pmml Parameters ---------- pipeline : Contains an instance of Pipeline with preprocessing and final estimator col_names : List Contains list of feature/column names. target_name : String Name of the target column. pmml_f_name : String Name of the pmml file. (Default='from_lgbm.pmml') model_name : string (optional) Name of the model description : string (optional) Description of the model Returns ------- Exports the generated PMML object to `pmml_f_name` """ try: model = pipeline.steps[-1][1] except: raise TypeError( "Exporter expects pipeleine_instance and not an estimator_instance" ) else: if col_names.__class__.__name__ == "ndarray": col_names = col_names.tolist() ppln_sans_predictor = pipeline.steps[:-1] trfm_dict_kwargs = dict() derived_col_names = col_names categoric_values = tuple() mining_imp_val = tuple() if ppln_sans_predictor: pml_pp = pp.get_preprocess_val(ppln_sans_predictor, col_names, model) trfm_dict_kwargs['TransformationDictionary'] = pml_pp['trfm_dict'] derived_col_names = pml_pp['derived_col_names'] col_names = pml_pp['preprocessed_col_names'] categoric_values = pml_pp['categorical_feat_values'] mining_imp_val = pml_pp['mining_imp_values'] PMML_kwargs = get_PMML_kwargs(model, derived_col_names, col_names, target_name, mining_imp_val, categoric_values, model_name) pmml = pml.PMML(version=PMML_SCHEMA.VERSION, Header=sklToPmml.get_header(description), DataDictionary=sklToPmml.get_data_dictionary( model, col_names, target_name, categoric_values), **trfm_dict_kwargs, **PMML_kwargs) pmml.export(outfile=open(pmml_f_name, "w"), level=0)
def test_lgbm_05(self): iris = datasets.load_iris() irisd = pd.DataFrame(iris.data, columns=iris.feature_names) irisd['target'] = [i % 2 for i in range(iris.data.shape[0])] features = irisd.columns.drop('target') target = 'target' f_name = "lgbc_bin_pmml.pmml" model = LGBMClassifier() pipeline_obj = Pipeline([('lgbmc', model)]) pipeline_obj.fit(irisd[features], irisd[target]) lgb_to_pmml(pipeline_obj, features, target, f_name) # self.assertEqual(os.path.isfile("lgbc_bin_pmml.pmml"), True) pmml_obj = pml.parse(f_name, True) pmml_value_list = [] model_value_list = [] pmml_score_list = [] model_score_list = [] seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment for seg in seg_tab: if int(seg.id) == 1: for segment in seg.MiningModel.Segmentation.Segment: node_tab = segment.TreeModel.Node.Node if not node_tab: pmml_score_list.append(segment.TreeModel.Node.score) else: for node in node_tab: varlen = node.get_Node().__len__() if varlen > 0: pmml_value_list.append( node.SimplePredicate.value) self.extractValues(node, pmml_value_list, pmml_score_list) else: pmml_value_list.append( node.SimplePredicate.value) pmml_score_list.append(node.score) main_key_value = [] lgb_dump = model.booster_.dump_model() for i in range(len(lgb_dump['tree_info'])): tree = lgb_dump['tree_info'][i]['tree_structure'] main_key_value.append(tree) for i in range(len(main_key_value)): list_score_temp = [] list_val_temp = [] node_list = main_key_value[i] self.create_node(node_list, list_score_temp, list_val_temp) model_score_list = model_score_list + list_score_temp model_value_list = model_value_list + list_val_temp list_val_temp.clear() list_score_temp.clear() ##1 for model_val, pmml_val in zip(model_score_list, pmml_score_list): self.assertEqual(model_val, float(pmml_val)) ##2 for model_val, pmml_val in zip(model_value_list, pmml_value_list): self.assertEqual(model_val, pmml_val) ##3 self.assertEqual(os.path.isfile(f_name), True)