示例#1
0
 def __init__(self, pmml):
     self.nyoka_pmml = ny.parse(pmml, True)
     self.image_input = None
     self.layer_input = None
     self.model = None
     self.layers_outputs = {}
     self.model = self._build_model()
    def test_lgbm_02(self):

        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        feature_names = [
            name for name in auto.columns if name not in ('mpg', 'car name')
        ]
        target_name = 'mpg'
        f_name = "lgbmr_pmml.pmml"
        model = LGBMRegressor()
        pipeline_obj = Pipeline([('lgbmr', model)])

        pipeline_obj.fit(auto[feature_names], auto[target_name])

        lgb_to_pmml(pipeline_obj, feature_names, target_name, f_name)

        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            for node in seg.TreeModel.Node.Node:
                varlen = node.get_Node().__len__()
                if varlen > 0:
                    pmml_value_list.append(node.SimplePredicate.value)
                    self.extractValues(node, pmml_value_list, pmml_score_list)
                else:
                    pmml_value_list.append(node.SimplePredicate.value)
                    pmml_score_list.append(node.score)

        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)

        for i in range(len(main_key_value)):
            list_score_temp = []
            list_val_temp = []
            node_list = main_key_value[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)
    def test_sklearn_01(self):

        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data,columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'
        f_name = "svc_pmml.pmml"
        model = SVC()
        pipeline_obj = Pipeline([
            ('svm',model)
        ])

        pipeline_obj.fit(irisd[features],irisd[target])
        skl_to_pmml(pipeline_obj,features,target,f_name)
        pmml_obj = pml.parse(f_name,True)
        ## 1
        svms = pmml_obj.SupportVectorMachineModel[0].SupportVectorMachine
        for mod_val, recon_val in zip(model.intercept_, svms):
            self.assertEqual("{:.16f}".format(mod_val), "{:.16f}".format(recon_val.Coefficients.absoluteValue))
        
        ## 2
        svm = pmml_obj.SupportVectorMachineModel[0]
        self.assertEqual(svm.RadialBasisKernelType.gamma,model._gamma)
    def test_sklearn_03(self):
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'
        f_name = "rf_pmml.pmml"
        model = RandomForestClassifier(n_estimators = 100)

        pipeline_obj = Pipeline([
            ("mapping", DataFrameMapper([
            (['sepal length (cm)', 'sepal width (cm)'], StandardScaler()) , 
            (['petal length (cm)', 'petal width (cm)'], Imputer())
            ])),
            ("rfc", model)
        ])

        pipeline_obj.fit(irisd[features], irisd[target])
        skl_to_pmml(pipeline_obj, features, target, f_name)
        pmml_obj = pml.parse(f_name,True)

        ## 1
        self.assertEqual(model.n_estimators,pmml_obj.MiningModel[0].Segmentation.Segment.__len__())

        ##2
        self.assertEqual(pmml_obj.MiningModel[0].Segmentation.multipleModelMethod, "majorityVote")
    def test_sklearn_02(self):
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data,columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'
        f_name = "knn_pmml.pmml"

        pipeline_obj = Pipeline([
            ('scaling',StandardScaler()), 
            ('knn',KNeighborsClassifier(n_neighbors = 5))
        ])

        pipeline_obj.fit(irisd[features],irisd[target])

        skl_to_pmml(pipeline_obj,features,target,f_name)

        pmml_obj = pml.parse(f_name,True)
        ##1
        self.assertIsNotNone(pmml_obj.NearestNeighborModel[0].ComparisonMeasure.euclidean)
        
        ##2
        self.assertEqual(pmml_obj.NearestNeighborModel[0].ComparisonMeasure.kind, "distance")
        
        ##3
        self.assertEqual(pipeline_obj.steps[-1][-1].n_neighbors, pmml_obj.NearestNeighborModel[0].numberOfNeighbors)
    def test_sklearn_04(self):
        titanic = pd.read_csv("nyoka/tests/titanic_train.csv")
        features = titanic.columns
        target = 'Survived'
        f_name = "gb_pmml.pmml"

        pipeline_obj = Pipeline([
            ("imp", Imputer(strategy="median")),
            ("gbc", GradientBoostingClassifier(n_estimators = 10))
        ])

        pipeline_obj.fit(titanic[features],titanic[target])

        skl_to_pmml(pipeline_obj, features, target, f_name)

        pmml_obj = pml.parse(f_name,True)

        ##1
        self.assertEqual(pmml_obj.MiningModel[0].Segmentation.multipleModelMethod, "modelChain")

        ##2
        self.assertEqual(pmml_obj.MiningModel[0].Segmentation.Segment.__len__(), 2)

        ##3
        self.assertEqual(pmml_obj.MiningModel[0].Segmentation.Segment[1].RegressionModel.normalizationMethod, "logit")
    def test_sklearn_06(self):
        df = pd.read_csv('nyoka/tests/auto-mpg.csv')
        X = df.drop(['mpg','car name'],axis=1)
        y = df['mpg']

        features = X.columns
        target = 'mpg'
        f_name = "linearregression_pmml.pmml"
        model = LinearRegression()

        pipeline_obj = Pipeline([
            ('model',model)
        ])

        pipeline_obj.fit(X,y)
        skl_to_pmml(pipeline_obj,features,target,f_name)
        pmml_obj = pml.parse(f_name, True)

        ## 1
        reg_tab = pmml_obj.RegressionModel[0].RegressionTable[0]
        self.assertEqual(reg_tab.intercept,model.intercept_)

        ## 2
        for model_val, pmml_val in zip(model.coef_, reg_tab.NumericPredictor):
            self.assertEqual("{:.16f}".format(model_val),"{:.16f}".format(pmml_val.coefficient))
示例#8
0
    def test_plain_text_script(self):

        model = applications.MobileNet(weights='imagenet',
                                       include_top=False,
                                       input_shape=(224, 224, 3))
        x = model.output
        x = Flatten()(x)
        x = Dense(1024, activation="relu")(x)
        predictions = Dense(2, activation='sigmoid')(x)
        model_final = Model(inputs=model.input,
                            outputs=predictions,
                            name='predictions')
        script_content = open("nyoka/tests/preprocess.py", 'r').read()
        pmml_obj = KerasToPmml(model_final,
                               dataSet='image',
                               predictedClasses=['cat', 'dog'],
                               script_args={
                                   "content": script_content,
                                   "def_name": "getBase64EncodedString",
                                   "return_type": "string",
                                   "encode": False
                               })
        pmml_obj.export(open("script_with_keras_plain.pmml", 'w'), 0)
        self.assertEqual(os.path.isfile("script_with_keras_plain.pmml"), True)
        reconPmmlObj = ny.parse("script_with_keras_plain.pmml", True)
        content = reconPmmlObj.TransformationDictionary.DefineFunction[
            0].Apply.Extension[0].anytypeobjs_
        content[0] = content[0].replace("\t", "")
        content = "\n".join(content)
        self.assertEqual(script_content, content)
        self.assertEqual(len(model_final.layers),
                         len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
示例#9
0
 def test_keras_02(self):
     boston = load_boston()
     data = pd.DataFrame(boston.data)
     features = list(boston.feature_names)
     target = 'PRICE'
     data.columns = features
     data['PRICE'] = boston.target
     x_train, x_test, y_train, y_test = train_test_split(data[features],
                                                         data[target],
                                                         test_size=0.20,
                                                         random_state=42)
     model = Sequential()
     model.add(
         Dense(13,
               input_dim=13,
               kernel_initializer='normal',
               activation='relu'))
     model.add(Dense(23))
     model.add(Dense(1, kernel_initializer='normal'))
     model.compile(loss='mean_squared_error', optimizer='adam')
     model.fit(x_train, y_train, epochs=1000, verbose=0)
     pmmlObj = KerasToPmml(model)
     pmmlObj.export(open('sequentialModel.pmml', 'w'), 0)
     reconPmmlObj = ny.parse('sequentialModel.pmml', True)
     self.assertEqual(os.path.isfile("sequentialModel.pmml"), True)
     self.assertEqual(len(model.layers),
                      len(reconPmmlObj.DeepNetwork[0].NetworkLayer) - 1)
示例#10
0
    def test_xgboost_02(self):
        auto = pd.read_csv('nyoka/tests/auto-mpg.csv')
        feature_names = [
            name for name in auto.columns if name not in ('mpg', 'car name')
        ]
        target_name = 'mpg'
        f_name = "xgbr_pmml.pmml"
        model = XGBRegressor()
        pipeline_obj = Pipeline([('xgbr', model)])

        pipeline_obj.fit(auto[feature_names], auto[target_name])
        xgboost_to_pmml(pipeline_obj,
                        feature_names,
                        target_name,
                        f_name,
                        description="A test model")
        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            for node in seg.TreeModel.Node.Node:
                varlen = node.get_Node().__len__()
                if varlen > 0:
                    pmml_value_list.append(node.SimplePredicate.value)
                    self.extractValues(node, pmml_value_list, pmml_score_list)
                else:
                    pmml_value_list.append(node.SimplePredicate.value)
                    pmml_score_list.append(node.score)

        get_nodes_in_json_format = []
        for i in range(model.n_estimators):
            get_nodes_in_json_format.append(
                json.loads(model._Booster.get_dump(dump_format='json')[i]))

        for i in range(len(get_nodes_in_json_format)):
            list_score_temp = []
            list_val_temp = []
            node_list = get_nodes_in_json_format[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)
示例#11
0
    def test_keras_01(self):

        cnn_pmml = KerasToPmml(self.model_final,model_name="MobileNet",description="Demo",\
            copyright="Internal User",dataSet='image',predictedClasses=['cats','dogs'])
        cnn_pmml.export(open('2classMBNet.pmml', "w"), 0)
        reconPmmlObj = ny.parse('2classMBNet.pmml', True)
        self.assertEqual(os.path.isfile("2classMBNet.pmml"), True)
        self.assertEqual(len(self.model_final.layers),
                         len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
示例#12
0
    def get_output(self):
        """
        Generates Output for RetinaNet

        Returns
        -------
        Nyoka's Output object
        """
        out_flds = []
        out_flds.append(
            pml.OutputField(name="predicted_LabelBoxScore",
                            dataType="string",
                            feature="predictedValue",
                            Extension=[
                                pml.Extension(extender="ADAPA",
                                              name="format",
                                              value="JSON")
                            ]))
        return pml.Output(OutputField=out_flds)
示例#13
0
    def get_training_parameter(self):
        """
        Generates TrainingParameters for RetinaNet

        Returns
        -------
        Nyoka's TrainingParameters object
        """
        train_param = pml.TrainingParameters(architectureName='retinanet')
        return train_param
示例#14
0
 def _nyoka_rule_set_model(
         self,
         rule_set_model: models.RuleSetModel) -> nyoka_pmml.RuleSetModel:
     return nyoka_pmml.RuleSetModel(
         functionName='classification',
         algorithmName='RuleSet',
         MiningSchema=None if rule_set_model.miningSchema is None else
         self._nyoka_mining_schema(rule_set_model.miningSchema),
         RuleSet=None if rule_set_model.ruleSet is None else
         self._nyoka_rule_set(rule_set_model.ruleSet))
示例#15
0
    def get_local_transformation(self):
        """
        Generates Trasformation information for RetinaNet

        Returns
        -------
        Nyoka's LocalTransformations object
        """
        apply = pml.Apply(
            function='KerasRetinaNet:getBase64StringFromBufferedInput',
            FieldRef=[pml.FieldRef(field=self.input_format)],
            Constant=[
                pml.Constant(valueOf_='tf' if self.backbone_name in
                             ['mobilenet', 'densenet'] else 'caffe')
            ])
        der_fld = pml.DerivedField(name="base64String",
                                   optype="categorical",
                                   dataType="string",
                                   Apply=apply)
        return pml.LocalTransformations(DerivedField=[der_fld])
示例#16
0
 def _nyoka_pmml_model(
     self, simple_pmml_ruleset_model: models.SimplePMMLRuleSetModel
 ) -> nyoka_pmml.PMML:
     timestamp = datetime.datetime.now(
     ) if self._timestamp is None else self._timestamp
     return nyoka_pmml.PMML(
         version=nyoka_constants.PMML_SCHEMA.VERSION,
         Header=nyoka_pmml.Header(
             copyright=NyokaSerializer.COPYRIGHT_STRING,
             description=nyoka_constants.HEADER_INFO.DEFAULT_DESCRIPTION,
             Timestamp=nyoka_pmml.Timestamp(timestamp),
             Application=nyoka_pmml.Application(
                 name=NyokaSerializer.APPLICATION_NAME,
                 version=version.version)),
         DataDictionary=None
         if simple_pmml_ruleset_model.dataDictionary is None else
         self._nyoka_data_dictionary(
             simple_pmml_ruleset_model.dataDictionary),
         RuleSetModel=None
         if simple_pmml_ruleset_model.ruleSetModel is None else [
             self._nyoka_rule_set_model(
                 simple_pmml_ruleset_model.ruleSetModel)
         ])
示例#17
0
    def test_keras_01(self):

        model = applications.MobileNet(weights='imagenet', include_top=False,input_shape = (224, 224,3))
        activType='sigmoid'
        x = model.output
        x = Flatten()(x)
        x = Dense(1024, activation="relu")(x)
        predictions = Dense(2, activation=activType)(x)
        model_final = Model(inputs =model.input, outputs = predictions,name='predictions')
        cnn_pmml = KerasToPmml(model_final,model_name="MobileNet",description="Demo",\
            copyright="Internal User",dataSet='image',predictedClasses=['cats','dogs'])
        cnn_pmml.export(open('2classMBNet.pmml', "w"), 0)
        reconPmmlObj=ny.parse('2classMBNet.pmml',True)
        self.assertEqual(os.path.isfile("2classMBNet.pmml"),True)
        self.assertEqual(len(model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
示例#18
0
 def test_construction_vgg(self):
     model = applications.VGG16(weights = "imagenet", include_top=False,input_shape = (224, 224, 3))
     x = model.output
     x = layers.Flatten()(x)
     x = layers.Dense(1024, activation="relu")(x)
     x = layers.Dropout(0.5)(x)
     x = layers.Dense(1024, activation="relu")(x)
     predictions = layers.Dense(2, activation="softmax")(x)
     model_final = models.Model(input = model.input, output = predictions)
     model_final.compile(loss = "binary_crossentropy", optimizer = optimizers.SGD(lr=0.0001, momentum=0.9), metrics=["accuracy"])
     pmmlObj=KerasToPmml(model_final,model_name="VGG16",dataSet='image')
     pmmlObj.export(open('vgg.pmml','w'),0)
     reconPmmlObj=ny.parse('vgg.pmml',True)
     self.assertEqual(os.path.isfile("vgg.pmml"),True)
     self.assertEqual(len(model_final.layers), len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
示例#19
0
    def test_02(self):

        backbone = 'mobilenet'
        RetinanetToPmml(self.model,
                        input_shape=(224, 224, 3),
                        backbone_name=backbone,
                        pmml_file_name="retinanet_with_coco_2.pmml")
        recon_pmml_obj = pml.parse("retinanet_with_coco_2.pmml", True)
        binary_buffered = recon_pmml_obj.DataDictionary.DataField[0].Extension[
            0].value
        self.assertEqual(binary_buffered, 'true')
        function = recon_pmml_obj.DeepNetwork[
            0].LocalTransformations.DerivedField[0].Apply.function
        self.assertEqual(function,
                         'KerasRetinaNet:getBase64StringFromBufferedInput')
        scaling = recon_pmml_obj.DeepNetwork[
            0].LocalTransformations.DerivedField[0].Apply.Constant[0].valueOf_
        self.assertEqual(scaling, 'tf')
示例#20
0
 def test_05(self):
     backbone = 'resnet'
     script_content = open("nyoka/tests/preprocess.py", 'r').read()
     RetinanetToPmml(self.model,
                     input_shape=(224, 224, 3),
                     backbone_name=backbone,
                     pmml_file_name="retinanet_with_coco_2.pmml",
                     script_args={
                         "content": script_content,
                         "def_name": "getBase64EncodedString",
                         "return_type": "string",
                         "encode": True
                     })
     recon_pmml_obj = pml.parse("retinanet_with_coco_2.pmml", True)
     content = recon_pmml_obj.TransformationDictionary.DefineFunction[
         0].Apply.Extension[0].anytypeobjs_[0]
     content = base64.b64decode(content).decode()
     self.assertEqual(script_content, content)
示例#21
0
    def test_sklearn_07(self):
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['Species'] = iris.target

        features = irisd.columns.drop('Species')
        target = 'Species'
        f_name = "logisticregression_pmml.pmml"
        model = LogisticRegression()

        pipeline_obj = Pipeline([
            ("mapping",
             DataFrameMapper([(['sepal length (cm)',
                                'sepal width (cm)'], StandardScaler()),
                              (['petal length (cm)',
                                'petal width (cm)'], Imputer())])),
            ("lr", model)
        ])

        pipeline_obj.fit(irisd[features], irisd[target])
        skl_to_pmml(pipeline_obj, features, target, f_name)
        pmml_obj = pml.parse(f_name, True)

        ## 1
        segmentation = pmml_obj.MiningModel[0].Segmentation
        self.assertEqual(segmentation.Segment.__len__(),
                         model.classes_.__len__() + 1)

        ## 2
        self.assertEqual(segmentation.multipleModelMethod, "modelChain")

        ##3
        self.assertEqual(
            segmentation.Segment[-1].RegressionModel.normalizationMethod,
            "simplemax")

        ##4
        for i in range(model.classes_.__len__()):
            self.assertEqual(
                segmentation.Segment[i].RegressionModel.normalizationMethod,
                "logit")
            self.assertEqual("{:.16f}".format(model.intercept_[i]),\
                 "{:.16f}".format(segmentation.Segment[i].RegressionModel.RegressionTable[0].intercept))
示例#22
0
 def test_03_encoded_script(self):
     script_content = open("nyoka/tests/preprocess.py", 'r').read()
     pmml_obj = KerasToPmml(self.model_final,
                            dataSet='image',
                            predictedClasses=['cat', 'dog'],
                            script_args={
                                "content": script_content,
                                "def_name": "getBase64EncodedString",
                                "return_type": "string",
                                "encode": True
                            })
     pmml_obj.export(open("script_with_keras.pmml", 'w'), 0)
     self.assertEqual(os.path.isfile("script_with_keras.pmml"), True)
     reconPmmlObj = pml.parse("script_with_keras.pmml", True)
     content = reconPmmlObj.TransformationDictionary.DefineFunction[
         0].Apply.Extension[0].anytypeobjs_[0]
     content = base64.b64decode(content).decode()
     self.assertEqual(script_content, content)
     self.assertEqual(len(self.model_final.layers),
                      len(reconPmmlObj.DeepNetwork[0].NetworkLayer))
示例#23
0
def get_outer_segmentation(model, derived_col_names, col_names, target_name,
                           mining_imp_val, categoric_values, model_name):
    """
    It returns the Segmentation element of the model.

    Parameters
    ----------
    model :
        Contains LGB model object.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    segmentation :
        Get the outer most Segmentation of an LGB model

    """

    if 'LGBMRegressor' in str(model.__class__):
        segmentation = get_segments(model, derived_col_names, col_names,
                                    target_name, mining_imp_val,
                                    categoric_values, model_name)
    else:
        segmentation = pml.Segmentation(
            multipleModelMethod=get_multiple_model_method(model),
            Segment=get_segments(model, derived_col_names, col_names,
                                 target_name, mining_imp_val, categoric_values,
                                 model_name))
    return segmentation
示例#24
0
def get_segments_for_lgbr(model, derived_col_names, feature_names, target_name,
                          mining_imp_val, categorical_values):
    """
        It returns all the Segments element of the model

       Parameters
       ----------
       model :
           Contains LGB model object.
       derived_col_names : List
           Contains column names after preprocessing.
       feature_names : List
           Contains list of feature/column names.
       target_name : List
           Name of the Target column.
       mining_imp_val : tuple
            Contains the mining_attributes,mining_strategy, mining_impute_value
        categoric_values : tuple
            Contains Categorical attribute names and its values

       Returns
       -------
       segment :
           Get the Segmentation element which contains inner segments.

       """
    segments = list()
    main_key_value = []
    lgb_dump = model.booster_.dump_model()
    for i in range(len(lgb_dump['tree_info'])):
        tree = lgb_dump['tree_info'][i]['tree_structure']
        main_key_value.append(tree)
    segmentation = pml.Segmentation(
        multipleModelMethod=MULTIPLE_MODEL_METHOD.SUM,
        Segment=generate_Segments_Equal_To_Estimators(main_key_value,
                                                      derived_col_names,
                                                      feature_names))
    return segmentation
示例#25
0
    def test_sklearn_04(self):
        titanic = pd.read_csv("nyoka/tests/titanic_train.csv")

        titanic['Embarked'] = titanic['Embarked'].fillna('S')

        features = list(
            titanic.columns.drop(
                ['PassengerId', 'Name', 'Ticket', 'Cabin', 'Survived']))
        target = 'Survived'
        f_name = "gb_pmml.pmml"

        pipeline_obj = Pipeline([
            ("mapping",
             DataFrameMapper([(['Sex'], LabelEncoder()),
                              (['Embarked'], LabelEncoder())])),
            ("imp", Imputer(strategy="median")),
            ("gbc", GradientBoostingClassifier(n_estimators=10))
        ])

        pipeline_obj.fit(titanic[features], titanic[target])

        skl_to_pmml(pipeline_obj, features, target, f_name)

        pmml_obj = pml.parse(f_name, True)

        ##1
        self.assertEqual(
            pmml_obj.MiningModel[0].Segmentation.multipleModelMethod,
            "modelChain")

        ##2
        self.assertEqual(
            pmml_obj.MiningModel[0].Segmentation.Segment.__len__(), 2)

        ##3
        self.assertEqual(
            pmml_obj.MiningModel[0].Segmentation.Segment[1].RegressionModel.
            normalizationMethod, "logit")
示例#26
0
def generate_Segments_Equal_To_Estimators(val, derived_col_names, col_names):
    """
    It returns number of Segments equal to the estimator of the model.

    Parameters
    ----------
    val : List
        Contains nodes in json format.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.
    Returns
    -------
    segments_equal_to_estimators :
         Returns list of segments equal to number of estimator of the model
    """
    segments_equal_to_estimators = []
    for i in range(len(val)):
        main_node = pml.Node(True_=pml.True_())
        mining_field_for_innner_segments = col_names
        m_flds = []
        create_node(val[i], main_node, derived_col_names)
        for name in mining_field_for_innner_segments:
            m_flds.append(pml.MiningField(name=name))

        segments_equal_to_estimators.append((pml.Segment(
            id=i + 1,
            True_=pml.True_(),
            TreeModel=pml.TreeModel(
                functionName=MINING_FUNCTION.REGRESSION,
                modelName="DecisionTreeModel",
                missingValueStrategy="none",
                noTrueChildStrategy="returnLastPrediction",
                splitCharacteristic=TREE_SPLIT_CHARACTERISTIC.MULTI,
                Node=main_node,
                MiningSchema=pml.MiningSchema(MiningField=m_flds)))))

    return segments_equal_to_estimators
示例#27
0
def get_ensemble_models(model, derived_col_names, col_names, target_name,
                        mining_imp_val, categoric_values, model_name):
    """
    It returns the Mining Model element of the model

    Parameters
    ----------
    model :
        Contains LGB model object.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value.
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    Returns the MiningModel for the given LGB model
    """
    model_kwargs = sklToPmml.get_model_kwargs(model, col_names, target_name,
                                              mining_imp_val, categoric_values)
    mining_models = list()
    mining_models.append(
        pml.MiningModel(modelName=model_name if model_name else "LightGBModel",
                        Segmentation=get_outer_segmentation(
                            model, derived_col_names, col_names, target_name,
                            mining_imp_val, categoric_values, model_name),
                        **model_kwargs))
    return mining_models
示例#28
0
def get_segments_for_lgbc(model, derived_col_names, feature_names, target_name,
                          mining_imp_val, categoric_values, model_name):
    """
    It returns all the segments of the LGB classifier.

    Parameters
    ----------
    model :
        Contains LGB model object.
    derived_col_names : List
        Contains column names after preprocessing.
    feature_names: List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    regrs_models :
        Returns all the segments of the LGB model.
        """
    segments = list()

    if model.n_classes_ == 2:
        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)
        mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment(
            feature_names)
        outputField = list()
        outputField.append(
            pml.OutputField(name="lgbValue",
                            optype=OPTYPE.CONTINUOUS,
                            dataType=DATATYPE.DOUBLE,
                            feature=RESULT_FEATURE.PREDICTED_VALUE,
                            isFinalResult="false"))
        out = pml.Output(OutputField=outputField)
        oField = list()
        oField.append("lgbValue")
        segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(
            main_key_value, derived_col_names, feature_names)
        First_segment = xgboostToPmml.add_segmentation(
            model, segments_equal_to_estimators, mining_schema_for_1st_segment,
            out, 1)
        reg_model = sklToPmml.get_regrs_models(model, oField, oField,
                                               target_name, mining_imp_val,
                                               categoric_values, model_name)[0]
        reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.LOGISTIC
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=2,
                                   RegressionModel=reg_model)
        segments.append(First_segment)

        segments.append(last_segment)
    else:
        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)
        oField = list()
        for index in range(0, model.n_classes_):
            inner_segment = []
            for in_seg in range(index, len(main_key_value), model.n_classes_):
                inner_segment.append(main_key_value[in_seg])
            mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment(
                feature_names)
            outputField = list()
            outputField.append(
                pml.OutputField(name='lgbValue(' + str(index) + ')',
                                optype=OPTYPE.CONTINUOUS,
                                feature=RESULT_FEATURE.PREDICTED_VALUE,
                                dataType=DATATYPE.FLOAT,
                                isFinalResult="true"))
            out = pml.Output(OutputField=outputField)

            oField.append('lgbValue(' + str(index) + ')')
            segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(
                inner_segment, derived_col_names, feature_names)
            segments_equal_to_class = xgboostToPmml.add_segmentation(
                model, segments_equal_to_estimators,
                mining_schema_for_1st_segment, out, index)
            segments.append(segments_equal_to_class)
        reg_model = sklToPmml.get_regrs_models(model, oField, oField,
                                               target_name, mining_imp_val,
                                               categoric_values, model_name)[0]
        reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.SOFTMAX
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=model.n_classes_ + 1,
                                   RegressionModel=reg_model)
        segments.append(last_segment)
    return segments
示例#29
0
def lgb_to_pmml(pipeline,
                col_names,
                target_name,
                pmml_f_name='from_lgbm.pmml',
                model_name=None,
                description=None):
    """
    Exports LGBM pipeline object into pmml

    Parameters
    ----------
    pipeline :
        Contains an instance of Pipeline with preprocessing and final estimator
    col_names : List
        Contains list of feature/column names.
    target_name : String
        Name of the target column.
    pmml_f_name : String
        Name of the pmml file. (Default='from_lgbm.pmml')
    model_name : string (optional)
        Name of the model
    description : string (optional)
        Description of the model

    Returns
    -------
    Exports the generated PMML object to `pmml_f_name`

    """
    try:
        model = pipeline.steps[-1][1]
    except:
        raise TypeError(
            "Exporter expects pipeleine_instance and not an estimator_instance"
        )
    else:
        if col_names.__class__.__name__ == "ndarray":
            col_names = col_names.tolist()
        ppln_sans_predictor = pipeline.steps[:-1]
        trfm_dict_kwargs = dict()
        derived_col_names = col_names
        categoric_values = tuple()
        mining_imp_val = tuple()
        if ppln_sans_predictor:
            pml_pp = pp.get_preprocess_val(ppln_sans_predictor, col_names,
                                           model)
            trfm_dict_kwargs['TransformationDictionary'] = pml_pp['trfm_dict']
            derived_col_names = pml_pp['derived_col_names']
            col_names = pml_pp['preprocessed_col_names']
            categoric_values = pml_pp['categorical_feat_values']
            mining_imp_val = pml_pp['mining_imp_values']
        PMML_kwargs = get_PMML_kwargs(model, derived_col_names, col_names,
                                      target_name, mining_imp_val,
                                      categoric_values, model_name)
        pmml = pml.PMML(version=PMML_SCHEMA.VERSION,
                        Header=sklToPmml.get_header(description),
                        DataDictionary=sklToPmml.get_data_dictionary(
                            model, col_names, target_name, categoric_values),
                        **trfm_dict_kwargs,
                        **PMML_kwargs)
        pmml.export(outfile=open(pmml_f_name, "w"), level=0)
    def test_lgbm_05(self):
        iris = datasets.load_iris()
        irisd = pd.DataFrame(iris.data, columns=iris.feature_names)
        irisd['target'] = [i % 2 for i in range(iris.data.shape[0])]

        features = irisd.columns.drop('target')
        target = 'target'
        f_name = "lgbc_bin_pmml.pmml"
        model = LGBMClassifier()
        pipeline_obj = Pipeline([('lgbmc', model)])

        pipeline_obj.fit(irisd[features], irisd[target])

        lgb_to_pmml(pipeline_obj, features, target, f_name)

        # self.assertEqual(os.path.isfile("lgbc_bin_pmml.pmml"), True)

        pmml_obj = pml.parse(f_name, True)

        pmml_value_list = []
        model_value_list = []

        pmml_score_list = []
        model_score_list = []

        seg_tab = pmml_obj.MiningModel[0].Segmentation.Segment
        for seg in seg_tab:
            if int(seg.id) == 1:
                for segment in seg.MiningModel.Segmentation.Segment:
                    node_tab = segment.TreeModel.Node.Node
                    if not node_tab:
                        pmml_score_list.append(segment.TreeModel.Node.score)
                    else:
                        for node in node_tab:
                            varlen = node.get_Node().__len__()
                            if varlen > 0:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                self.extractValues(node, pmml_value_list,
                                                   pmml_score_list)
                            else:
                                pmml_value_list.append(
                                    node.SimplePredicate.value)
                                pmml_score_list.append(node.score)

        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)

        for i in range(len(main_key_value)):
            list_score_temp = []
            list_val_temp = []
            node_list = main_key_value[i]
            self.create_node(node_list, list_score_temp, list_val_temp)
            model_score_list = model_score_list + list_score_temp
            model_value_list = model_value_list + list_val_temp
            list_val_temp.clear()
            list_score_temp.clear()

        ##1
        for model_val, pmml_val in zip(model_score_list, pmml_score_list):
            self.assertEqual(model_val, float(pmml_val))

        ##2
        for model_val, pmml_val in zip(model_value_list, pmml_value_list):
            self.assertEqual(model_val, pmml_val)

        ##3
        self.assertEqual(os.path.isfile(f_name), True)