Exemplo n.º 1
0
def generate_Segments_Equal_To_Estimators(val, derived_col_names, col_names):
    """
    It returns number of Segments equal to the estimator of the model.

    Parameters
    ----------
    val : List
        Contains nodes in json format.
    derived_col_names : List
        Contains column names after preprocessing.
    col_names : List
        Contains list of feature/column names.
    Returns
    -------
    segments_equal_to_estimators :
         Returns list of segments equal to number of estimator of the model
    """
    segments_equal_to_estimators = []
    for i in range(len(val)):
        main_node = pml.Node(True_=pml.True_())
        mining_field_for_innner_segments = col_names
        m_flds = []
        create_node(val[i], main_node, derived_col_names)
        for name in mining_field_for_innner_segments:
            m_flds.append(pml.MiningField(name=name))

        segments_equal_to_estimators.append((pml.Segment(
            id=i + 1,
            True_=pml.True_(),
            TreeModel=pml.TreeModel(
                functionName=MINING_FUNCTION.REGRESSION,
                modelName="DecisionTreeModel",
                missingValueStrategy="none",
                noTrueChildStrategy="returnLastPrediction",
                splitCharacteristic=TREE_SPLIT_CHARACTERISTIC.MULTI,
                Node=main_node,
                MiningSchema=pml.MiningSchema(MiningField=m_flds)))))

    return segments_equal_to_estimators
Exemplo n.º 2
0
def get_segments_for_lgbc(model, derived_col_names, feature_names, target_name,
                          mining_imp_val, categoric_values, model_name):
    """
    It returns all the segments of the LGB classifier.

    Parameters
    ----------
    model :
        Contains LGB model object.
    derived_col_names : List
        Contains column names after preprocessing.
    feature_names: List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    regrs_models :
        Returns all the segments of the LGB model.
        """
    segments = list()

    if model.n_classes_ == 2:
        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)
        mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment(
            feature_names)
        outputField = list()
        outputField.append(
            pml.OutputField(name="lgbValue",
                            optype=OPTYPE.CONTINUOUS,
                            dataType=DATATYPE.DOUBLE,
                            feature=RESULT_FEATURE.PREDICTED_VALUE,
                            isFinalResult="false"))
        out = pml.Output(OutputField=outputField)
        oField = list()
        oField.append("lgbValue")
        segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(
            main_key_value, derived_col_names, feature_names)
        First_segment = xgboostToPmml.add_segmentation(
            model, segments_equal_to_estimators, mining_schema_for_1st_segment,
            out, 1)
        reg_model = sklToPmml.get_regrs_models(model, oField, oField,
                                               target_name, mining_imp_val,
                                               categoric_values, model_name)[0]
        reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.LOGISTIC
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=2,
                                   RegressionModel=reg_model)
        segments.append(First_segment)

        segments.append(last_segment)
    else:
        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)
        oField = list()
        for index in range(0, model.n_classes_):
            inner_segment = []
            for in_seg in range(index, len(main_key_value), model.n_classes_):
                inner_segment.append(main_key_value[in_seg])
            mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment(
                feature_names)
            outputField = list()
            outputField.append(
                pml.OutputField(name='lgbValue(' + str(index) + ')',
                                optype=OPTYPE.CONTINUOUS,
                                feature=RESULT_FEATURE.PREDICTED_VALUE,
                                dataType=DATATYPE.FLOAT,
                                isFinalResult="true"))
            out = pml.Output(OutputField=outputField)

            oField.append('lgbValue(' + str(index) + ')')
            segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(
                inner_segment, derived_col_names, feature_names)
            segments_equal_to_class = xgboostToPmml.add_segmentation(
                model, segments_equal_to_estimators,
                mining_schema_for_1st_segment, out, index)
            segments.append(segments_equal_to_class)
        reg_model = sklToPmml.get_regrs_models(model, oField, oField,
                                               target_name, mining_imp_val,
                                               categoric_values, model_name)[0]
        reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.SOFTMAX
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=model.n_classes_ + 1,
                                   RegressionModel=reg_model)
        segments.append(last_segment)
    return segments