Exemplo n.º 1
0
def get_segments_for_lgbc(model, derived_col_names, feature_names, target_name,
                          mining_imp_val, categoric_values, model_name):
    """
    It returns all the segments of the LGB classifier.

    Parameters
    ----------
    model :
        Contains LGB model object.
    derived_col_names : List
        Contains column names after preprocessing.
    feature_names: List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    regrs_models :
        Returns all the segments of the LGB model.
        """
    segments = list()

    if model.n_classes_ == 2:
        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)
        mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment(
            feature_names)
        outputField = list()
        outputField.append(
            pml.OutputField(name="lgbValue",
                            optype=OPTYPE.CONTINUOUS,
                            dataType=DATATYPE.DOUBLE,
                            feature=RESULT_FEATURE.PREDICTED_VALUE,
                            isFinalResult="false"))
        out = pml.Output(OutputField=outputField)
        oField = list()
        oField.append("lgbValue")
        segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(
            main_key_value, derived_col_names, feature_names)
        First_segment = xgboostToPmml.add_segmentation(
            model, segments_equal_to_estimators, mining_schema_for_1st_segment,
            out, 1)
        reg_model = sklToPmml.get_regrs_models(model, oField, oField,
                                               target_name, mining_imp_val,
                                               categoric_values, model_name)[0]
        reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.LOGISTIC
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=2,
                                   RegressionModel=reg_model)
        segments.append(First_segment)

        segments.append(last_segment)
    else:
        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            main_key_value.append(tree)
        oField = list()
        for index in range(0, model.n_classes_):
            inner_segment = []
            for in_seg in range(index, len(main_key_value), model.n_classes_):
                inner_segment.append(main_key_value[in_seg])
            mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment(
                feature_names)
            outputField = list()
            outputField.append(
                pml.OutputField(name='lgbValue(' + str(index) + ')',
                                optype=OPTYPE.CONTINUOUS,
                                feature=RESULT_FEATURE.PREDICTED_VALUE,
                                dataType=DATATYPE.FLOAT,
                                isFinalResult="true"))
            out = pml.Output(OutputField=outputField)

            oField.append('lgbValue(' + str(index) + ')')
            segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(
                inner_segment, derived_col_names, feature_names)
            segments_equal_to_class = xgboostToPmml.add_segmentation(
                model, segments_equal_to_estimators,
                mining_schema_for_1st_segment, out, index)
            segments.append(segments_equal_to_class)
        reg_model = sklToPmml.get_regrs_models(model, oField, oField,
                                               target_name, mining_imp_val,
                                               categoric_values, model_name)[0]
        reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.SOFTMAX
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=model.n_classes_ + 1,
                                   RegressionModel=reg_model)
        segments.append(last_segment)
    return segments
Exemplo n.º 2
0
def get_segments_for_xgbc(model, derived_col_names, feature_names, target_name,
                          mining_imp_val, categoric_values, model_name):
    """
    It returns all the segments of the Xgboost classifier.

    Parameters
    ----------
    model :
        Contains Xgboost model object.
    derived_col_names : List
        Contains column names after preprocessing.
    feature_names: List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values
    model_name : string
        Name of the model

    Returns
    -------
    regrs_models :
        Returns Nyoka's Segment object
    """
    segments = list()

    if model.n_classes_ == 2:
        get_nodes_in_json_format = []
        for i in range(model.n_estimators):
            get_nodes_in_json_format.append(
                json.loads(model._Booster.get_dump(dump_format='json')[i]))
        mining_schema_for_1st_segment = mining_Field_For_First_Segment(
            feature_names)
        outputField = list()
        outputField.append(
            pml.OutputField(name="xgbValue",
                            optype=OPTYPE.CONTINUOUS.value,
                            dataType=DATATYPE.FLOAT.value,
                            feature=RESULT_FEATURE.PREDICTED_VALUE.value,
                            isFinalResult="true"))
        out = pml.Output(OutputField=outputField)
        oField = list()
        oField.append('xgbValue')
        segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(
            get_nodes_in_json_format, derived_col_names, feature_names)
        First_segment = add_segmentation(model, segments_equal_to_estimators,
                                         mining_schema_for_1st_segment, out, 1)
        reg_model = sklToPmml.get_regrs_models(model, oField, oField,
                                               target_name, mining_imp_val,
                                               categoric_values, model_name)[0]
        reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.LOGISTIC.value
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=2,
                                   RegressionModel=reg_model)
        segments.append(First_segment)

        segments.append(last_segment)
    else:

        get_nodes_in_json_format = []
        for i in range(model.n_estimators * model.n_classes_):
            get_nodes_in_json_format.append(
                json.loads(model._Booster.get_dump(dump_format='json')[i]))
        oField = list()
        for index in range(0, model.n_classes_):
            inner_segment = []
            for in_seg in range(index, len(get_nodes_in_json_format),
                                model.n_classes_):
                inner_segment.append(get_nodes_in_json_format[in_seg])
            mining_schema_for_1st_segment = mining_Field_For_First_Segment(
                feature_names)
            outputField = list()
            outputField.append(
                pml.OutputField(name='xgbValue(' + str(index) + ')',
                                optype=OPTYPE.CONTINUOUS.value,
                                feature=RESULT_FEATURE.PREDICTED_VALUE.value,
                                dataType=DATATYPE.FLOAT.value,
                                isFinalResult="true"))
            out = pml.Output(OutputField=outputField)

            oField.append('xgbValue(' + str(index) + ')')
            segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(
                inner_segment, derived_col_names, feature_names)
            segments_equal_to_class = add_segmentation(
                model, segments_equal_to_estimators,
                mining_schema_for_1st_segment, out, index)
            segments.append(segments_equal_to_class)
        reg_model = sklToPmml.get_regrs_models(model, oField, oField,
                                               target_name, mining_imp_val,
                                               categoric_values, model_name)[0]
        reg_model.normalizationMethod = REGRESSION_NORMALIZATION_METHOD.SOFTMAX.value
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=model.n_classes_ + 1,
                                   RegressionModel=reg_model)
        segments.append(last_segment)
    return segments
Exemplo n.º 3
0
def get_segments_for_xgbc(skl_model, derived_col_names, feature_names,
                          target_name, mining_imp_val, categoric_values):
    """
    It returns all the segments of the Xgboost classifier.

    Parameters
    ----------
    skl_model :
        Contains Xgboost model object.
    derived_col_names : List
        Contains column names after preprocessing.
    feature_names: List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    regrs_models :
        Returns all the segments of the xgboost model.
        """
    segments = list()

    if skl_model.n_classes_ == 2:
        get_nodes_in_json_format = []
        for i in range(skl_model.n_estimators):
            get_nodes_in_json_format.append(
                json.loads(skl_model._Booster.get_dump(dump_format='json')[i]))
        main_key_value = generate_main_Key_Value(get_nodes_in_json_format)
        mining_schema_for_1st_segment = mining_Field_For_First_Segment(
            feature_names)
        outputField = list()
        outputField.append(
            pml.OutputField(name="xgbValue",
                            optype="continuous",
                            dataType="float",
                            feature="predictedValue",
                            isFinalResult="true"))
        out = pml.Output(OutputField=outputField)
        oField = list()
        oField.append('xgbValue')
        segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(
            main_key_value, derived_col_names, feature_names)
        First_segment = add_segmentation(skl_model,
                                         segments_equal_to_estimators,
                                         mining_schema_for_1st_segment, out, 1)
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=2,
                                   RegressionModel=sklToPmml.get_regrs_models(
                                       skl_model, oField, oField, target_name,
                                       mining_imp_val, categoric_values)[0])
        segments.append(First_segment)

        segments.append(last_segment)
    else:

        get_nodes_in_json_format = []
        for i in range(skl_model.n_estimators * skl_model.n_classes_):
            get_nodes_in_json_format.append(
                json.loads(skl_model._Booster.get_dump(dump_format='json')[i]))
        main_key_value = generate_main_Key_Value(get_nodes_in_json_format)
        oField = list()
        for index in range(0, skl_model.n_classes_):
            inner_segment = []
            for in_seg in range(index, len(main_key_value),
                                skl_model.n_classes_):
                inner_segment.append(main_key_value[in_seg])
            mining_schema_for_1st_segment = mining_Field_For_First_Segment(
                feature_names)
            outputField = list()
            outputField.append(
                pml.OutputField(name='xgbValue(' + str(index) + ')',
                                optype="continuous",
                                feature="predictedValue",
                                isFinalResult="true"))
            out = pml.Output(OutputField=outputField)

            oField.append('xgbValue(' + str(index) + ')')
            segments_equal_to_estimators = generate_Segments_Equal_To_Estimators(
                inner_segment, derived_col_names, feature_names)
            segments_equal_to_class = add_segmentation(
                skl_model, segments_equal_to_estimators,
                mining_schema_for_1st_segment, out, index)
            segments.append(segments_equal_to_class)
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=skl_model.n_classes_ + 1,
                                   RegressionModel=sklToPmml.get_regrs_models(
                                       skl_model, oField, oField, target_name,
                                       mining_imp_val, categoric_values)[0])
        segments.append(last_segment)
    return segments
Exemplo n.º 4
0
def get_segments_for_lgbc(model, derived_col_names, feature_names, target_name,
                          mining_imp_val, categoric_values):
    """
    It returns all the segments of the LGB classifier.

    Parameters
    ----------
    model :
        Contains LGB model object.
    derived_col_names : List
        Contains column names after preprocessing.
    feature_names: List
        Contains list of feature/column names.
    target_name : String
        Name of the Target column.
    mining_imp_val : tuple
        Contains the mining_attributes,mining_strategy, mining_impute_value
    categoric_values : tuple
        Contains Categorical attribute names and its values

    Returns
    -------
    regrs_models :
        Returns all the segments of the LGB model.
        """
    segments = list()

    if model.n_classes_ == 2:
        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            list_of_nodes = []
            main_key_value.append(
                generate_structure_for_lgb(tree, list_of_nodes,
                                           derived_col_names))
        mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment(
            feature_names)
        outputField = list()
        outputField.append(
            pml.OutputField(name="lgbValue",
                            optype="continuous",
                            dataType="float",
                            feature="predictedValue",
                            isFinalResult="true"))
        out = pml.Output(OutputField=outputField)
        oField = list()
        oField.append('lgbValue')
        segments_equal_to_estimators = xgboostToPmml.generate_Segments_Equal_To_Estimators(
            main_key_value, derived_col_names, feature_names)
        First_segment = xgboostToPmml.add_segmentation(
            model, segments_equal_to_estimators, mining_schema_for_1st_segment,
            out, 1)
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=2,
                                   RegressionModel=sklToPmml.get_regrs_models(
                                       model, oField, oField, target_name,
                                       mining_imp_val, categoric_values)[0])
        segments.append(First_segment)

        segments.append(last_segment)
    else:
        main_key_value = []
        lgb_dump = model.booster_.dump_model()
        for i in range(len(lgb_dump['tree_info'])):
            tree = lgb_dump['tree_info'][i]['tree_structure']
            list_of_nodes = []
            main_key_value.append(
                generate_structure_for_lgb(tree, list_of_nodes,
                                           derived_col_names))
        oField = list()
        for index in range(0, model.n_classes_):
            inner_segment = []
            for in_seg in range(index, len(main_key_value), model.n_classes_):
                inner_segment.append(main_key_value[in_seg])
            mining_schema_for_1st_segment = xgboostToPmml.mining_Field_For_First_Segment(
                feature_names)
            outputField = list()
            outputField.append(
                pml.OutputField(name='lgbValue(' + str(index) + ')',
                                optype="continuous",
                                feature="predictedValue",
                                isFinalResult="true"))
            out = pml.Output(OutputField=outputField)

            oField.append('lgbValue(' + str(index) + ')')
            segments_equal_to_estimators = xgboostToPmml.generate_Segments_Equal_To_Estimators(
                inner_segment, derived_col_names, feature_names)
            segments_equal_to_class = xgboostToPmml.add_segmentation(
                model, segments_equal_to_estimators,
                mining_schema_for_1st_segment, out, index)
            segments.append(segments_equal_to_class)
        last_segment = pml.Segment(True_=pml.True_(),
                                   id=model.n_classes_ + 1,
                                   RegressionModel=sklToPmml.get_regrs_models(
                                       model, oField, oField, target_name,
                                       mining_imp_val, categoric_values)[0])
        segments.append(last_segment)
    return segments