Пример #1
0
    def test_XGBoost(self):
        model=XGBClassifier()
        sk_model=iris_dataset(model)
        derived_col_names=['sepal_length','petal_length']
        col_names=['sepal_length','petal_length']
        target_name='species'
        mining_imp_value=()
        categoric_values=()

        self.assertEqual(
            len(xgb.get_segments_for_xgbc(sk_model,derived_col_names,col_names,target_name,mining_imp_value,categoric_values)),
            4)
        self.assertEqual(
            xgb.get_segments_for_xgbc(sk_model,derived_col_names,col_names,target_name,mining_imp_value,categoric_values)[
                3].get_id(),4)
        self.assertEqual(
            xgb.get_segments_for_xgbc(sk_model, derived_col_names, col_names, target_name, mining_imp_value,categoric_values)[
                3].get_RegressionModel().get_RegressionTable()[0].get_intercept(), '0.0')



        self.assertEqual(
            xgb.mining_Field_For_First_Segment(col_names).__class__.__name__,
            pml.MiningSchema().__class__.__name__)
        self.assertEqual(
            xgb.mining_Field_For_First_Segment(col_names).get_MiningField()[0].get_name(),
            'sepal_length')
        self.assertEqual(len(xgb.mining_Field_For_First_Segment(['a','b','d','e']).get_MiningField())
                         ,4)


        self.assertEqual(
            type(xgb.generate_Segments_Equal_To_Estimators([],derived_col_names,col_names)),
            type([]))


        self.assertEqual(
            xgb.add_segmentation(sk_model,[],[],pml.Output,1).__class__.__name__,
            pml.Segment().__class__.__name__)
        self.assertEqual(
            xgb.add_segmentation(sk_model,[],[],pml.Output,1).get_MiningModel().__class__.__name__,
            pml.MiningModel().__class__.__name__)
        self.assertEqual(xgb.add_segmentation(sk_model, [], [], pml.Output, 1).get_id(),
                         2)

        self.assertEqual(type(xgb.get_regrs_tabl(sk_model,col_names,'species',categoric_values)),type([]))
Пример #2
0
def get_segments_for_lgbc(model, derived_col_names, feature_names, target_name,
                          mining_imp_val, categoric_values, model_name):
    """
    It returns all the segments of the LGB classifier.

    Parameters
    ----------
    model :
        Contains LGB model object.
    derived_col_names : List
        Contains column names after preprocessing.
    feature_names: List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    regrs_models :
        Returns all the segments of the LGB model.
        """
    segments = list()

    if model.n_classes_ == 2:
        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)
        mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment(
            feature_names)
        outputField = list()
        outputField.append(
            pml.OutputField(name="lgbValue",
                            optype=OPTYPE.CONTINUOUS,
                            dataType=DATATYPE.DOUBLE,
                            feature=RESULT_FEATURE.PREDICTED_VALUE,
                            isFinalResult="false"))
        out = pml.Output(OutputField=outputField)
        oField = list()
        oField.append("lgbValue")
        segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(
            main_key_value, derived_col_names, feature_names)
        First_segment = xgboostToPmml.add_segmentation(
            model, segments_equal_to_estimators, mining_schema_for_1st_segment,
            out, 1)
        reg_model = sklToPmml.get_regrs_models(model, oField, oField,
                                               target_name, mining_imp_val,
                                               categoric_values, model_name)[0]
        reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.LOGISTIC
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=2,
                                   RegressionModel=reg_model)
        segments.append(First_segment)

        segments.append(last_segment)
    else:
        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)
        oField = list()
        for index in range(0, model.n_classes_):
            inner_segment = []
            for in_seg in range(index, len(main_key_value), model.n_classes_):
                inner_segment.append(main_key_value[in_seg])
            mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment(
                feature_names)
            outputField = list()
            outputField.append(
                pml.OutputField(name='lgbValue(' + str(index) + ')',
                                optype=OPTYPE.CONTINUOUS,
                                feature=RESULT_FEATURE.PREDICTED_VALUE,
                                dataType=DATATYPE.FLOAT,
                                isFinalResult="true"))
            out = pml.Output(OutputField=outputField)

            oField.append('lgbValue(' + str(index) + ')')
            segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(
                inner_segment, derived_col_names, feature_names)
            segments_equal_to_class = xgboostToPmml.add_segmentation(
                model, segments_equal_to_estimators,
                mining_schema_for_1st_segment, out, index)
            segments.append(segments_equal_to_class)
        reg_model = sklToPmml.get_regrs_models(model, oField, oField,
                                               target_name, mining_imp_val,
                                               categoric_values, model_name)[0]
        reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.SOFTMAX
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=model.n_classes_ + 1,
                                   RegressionModel=reg_model)
        segments.append(last_segment)
    return segments
Пример #3
0
def get_segments_for_lgbc(model, derived_col_names, feature_names, target_name,
                          mining_imp_val, categoric_values):
    """
    It returns all the segments of the LGB classifier.

    Parameters
    ----------
    model :
        Contains LGB model object.
    derived_col_names : List
        Contains column names after preprocessing.
    feature_names: List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    regrs_models :
        Returns all the segments of the LGB model.
        """
    segments = list()

    if model.n_classes_ == 2:
        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            list_of_nodes = []
            main_key_value.append(
                generate_structure_for_lgb(tree, list_of_nodes,
                                           derived_col_names))
        mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment(
            feature_names)
        outputField = list()
        outputField.append(
            pml.OutputField(name="lgbValue",
                            optype="continuous",
                            dataType="float",
                            feature="predictedValue",
                            isFinalResult="true"))
        out = pml.Output(OutputField=outputField)
        oField = list()
        oField.append('lgbValue')
        segments_equal_to_estimators = xgboostToPmml.generate_Segments_Equal_To_Estimators(
            main_key_value, derived_col_names, feature_names)
        First_segment = xgboostToPmml.add_segmentation(
            model, segments_equal_to_estimators, mining_schema_for_1st_segment,
            out, 1)
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=2,
                                   RegressionModel=sklToPmml.get_regrs_models(
                                       model, oField, oField, target_name,
                                       mining_imp_val, categoric_values)[0])
        segments.append(First_segment)

        segments.append(last_segment)
    else:
        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            list_of_nodes = []
            main_key_value.append(
                generate_structure_for_lgb(tree, list_of_nodes,
                                           derived_col_names))
        oField = list()
        for index in range(0, model.n_classes_):
            inner_segment = []
            for in_seg in range(index, len(main_key_value), model.n_classes_):
                inner_segment.append(main_key_value[in_seg])
            mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment(
                feature_names)
            outputField = list()
            outputField.append(
                pml.OutputField(name='lgbValue(' + str(index) + ')',
                                optype="continuous",
                                feature="predictedValue",
                                isFinalResult="true"))
            out = pml.Output(OutputField=outputField)

            oField.append('lgbValue(' + str(index) + ')')
            segments_equal_to_estimators = xgboostToPmml.generate_Segments_Equal_To_Estimators(
                inner_segment, derived_col_names, feature_names)
            segments_equal_to_class = xgboostToPmml.add_segmentation(
                model, segments_equal_to_estimators,
                mining_schema_for_1st_segment, out, index)
            segments.append(segments_equal_to_class)
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=model.n_classes_ + 1,
                                   RegressionModel=sklToPmml.get_regrs_models(
                                       model, oField, oField, target_name,
                                       mining_imp_val, categoric_values)[0])
        segments.append(last_segment)
    return segments