def __init__(self, pmml):
    PMMLBaseRegressor.__init__(self, pmml)
    OneHotEncodingMixin.__init__(self)

    # Import coefficients and intercepts
    model = self.root.find('RegressionModel')

    if model is None:
      raise Exception('PMML model does not contain RegressionModel.')

    tables = model.findall('RegressionTable')

    self.coef_ = np.array([
      _get_coefficients(self, table)
      for table in tables
    ])
    self.intercept_ = np.array([
      float(table.get('intercept'))
      for table in tables
    ])

    if self.coef_.shape[0] == 1:
      self.coef_ = self.coef_[0]

    if self.intercept_.shape[0] == 1:
      self.intercept_ = self.intercept_[0]
示例#2
0
  def __init__(self, pmml):
    PMMLBaseRegressor.__init__(self, pmml)

    # Setup a column transformer to deal with categorical variables
    target = self.target_field.get('name')
    fields = [field for name, field in self.fields.items() if name != target]

    def encoder_for(field):
      if field.get('optype') != 'categorical':
        return 'passthrough'

      encoder = OneHotEncoder()
      encoder.categories_ = np.array([self.field_mapping[field.get('name')][1].categories])
      encoder._legacy_mode = False
      return encoder

    transformer = ColumnTransformer(
      transformers=[
        (field.get('name'), encoder_for(field), [self.field_mapping[field.get('name')][0]])
        for field in fields
        if field.tag == 'DataField'
      ]
    )

    X = np.array([[0 for field in fields if field.tag == "DataField"]])
    transformer._validate_transformers()
    transformer._validate_column_callables(X)
    transformer._validate_remainder(X)
    transformer.transformers_ = transformer.transformers
    transformer.sparse_output_ = False
    transformer._feature_names_in = None

    self.transformer = transformer
示例#3
0
    def __init__(self, pmml):
        PMMLBaseRegressor.__init__(self, pmml)

        tree_model = self.root.find('TreeModel')

        if tree_model is None:
            raise Exception('PMML model does not contain TreeModel.')

        # Parse tree
        self.n_outputs_ = 1
        n_classes = np.array([1] * self.n_outputs_, dtype=np.intp)
        try:
            self.tree_ = Tree(self.n_features_in_, n_classes, self.n_outputs_,
                              np.array([], dtype=np.int32))
        except AttributeError:
            self.tree_ = Tree(self.n_features_, n_classes, self.n_outputs_,
                              np.array([], dtype=np.int32))

        split = tree_model.get('splitCharacteristic')
        if split == 'binarySplit':
            first_node = tree_model.find('Node')
        else:
            first_node = unflatten(tree_model.find('Node'))

        nodes, values = construct_tree(first_node,
                                       None,
                                       self.field_mapping,
                                       rescale_factor=0.1)

        node_ndarray = np.ascontiguousarray(nodes, dtype=NODE_DTYPE)
        value_ndarray = np.ascontiguousarray(values)
        max_depth = None

        state = {
            'max_depth': (2**31) - 1 if max_depth is None else max_depth,
            'node_count': node_ndarray.shape[0],
            'nodes': node_ndarray,
            'values': value_ndarray
        }
        self.tree_.__setstate__(state)

        # Required after constructing trees, because categories may be inferred in
        # the parsing process
        target = self.target_field.get('name')
        fields = [
            field for name, field in self.fields.items() if name != target
        ]
        n_categories = np.asarray([
            len(self.field_mapping[field.get('name')][1].categories)
            if field.get('optype') == 'categorical' else -1
            for field in fields if field.tag == 'DataField'
        ],
                                  dtype=np.int32,
                                  order='C')

        self.tree_.set_n_categories(n_categories)
示例#4
0
    def __init__(self, pmml):
        PMMLBaseRegressor.__init__(self, pmml)
        OneHotEncodingMixin.__init__(self)

        # Import coefficients and intercepts
        model = self.root.find('GeneralRegressionModel')

        if model is None:
            raise Exception(
                'PMML model does not contain GeneralRegressionModel.')

        self.coef_ = np.array(_get_coefficients(self, model))
        self.intercept_ = _get_intercept(model)
 def fit(self, x, y):
   return PMMLBaseRegressor.fit(self, x, y)
示例#6
0
    def __init__(self, pmml, n_jobs=None):
        PMMLBaseRegressor.__init__(self, pmml)

        mining_model = self.root.find('MiningModel')
        if mining_model is None:
            raise Exception('PMML model does not contain MiningModel.')

        segmentation = mining_model.find('Segmentation')
        if segmentation is None:
            raise Exception('PMML model does not contain Segmentation.')

        if segmentation.get('multipleModelMethod') not in [
                'majorityVote', 'average'
        ]:
            raise Exception(
                'PMML model ensemble should use majority vote or average.')

        # Parse segments
        segments = segmentation.findall('Segment')
        valid_segments = [
            segment for segment in segments if segment.find('True') is not None
        ]

        if len(valid_segments) < len(segments):
            warnings.warn(
                'Warning: {} segment(s) ignored because of unsupported predicate.'
                .format(len(segments) - len(valid_segments)))

        n_estimators = len(valid_segments)
        self.n_outputs_ = 1
        RandomForestRegressor.__init__(self,
                                       n_estimators=n_estimators,
                                       n_jobs=n_jobs)
        self._validate_estimator()

        clf = self._make_estimator(append=False, random_state=123)
        try:
            clf.n_features_in_ = self.n_features_in_
        except AttributeError:
            clf.n_features_ = self.n_features_
        clf.n_outputs_ = self.n_outputs_
        self.template_estimator = clf

        self.estimators_ = [
            get_tree(self, s, rescale_factor=0.1) for s in valid_segments
        ]

        # Required after constructing trees, because categories may be inferred in
        # the parsing process
        target = self.target_field.get('name')
        fields = [
            field for name, field in self.fields.items() if name != target
        ]
        for clf in self.estimators_:
            n_categories = np.asarray([
                len(self.field_mapping[field.get('name')][1].categories)
                if field.get('optype') == 'categorical' else -1
                for field in fields if field.tag == 'DataField'
            ],
                                      dtype=np.int32,
                                      order='C')
            clf.n_categories = n_categories
            clf.tree_.set_n_categories(n_categories)

        self.categorical = [x != -1 for x in self.estimators_[0].n_categories]
示例#7
0
  def __init__(self, pmml, n_jobs=None):
    PMMLBaseRegressor.__init__(self, pmml)
    KNeighborsRegressor.__init__(self, n_jobs=n_jobs)
    PMMLBaseKNN.__init__(self)

    KNeighborsRegressor.fit(self, self._X, self._y)
示例#8
0
 def __init__(self, pmml):
     PMMLBaseRegressor.__init__(self, pmml)
     OneHotEncodingMixin.__init__(self)
     SVR.__init__(self)
     PMMLBaseSVM.__init__(self)
示例#9
0
    def __init__(self, pmml):
        PMMLBaseRegressor.__init__(self, pmml)

        mining_model = self.root.find('MiningModel')
        if mining_model is None:
            raise Exception('PMML model does not contain MiningModel.')

        segmentation = mining_model.find('Segmentation')
        if segmentation is None:
            raise Exception('PMML model does not contain Segmentation.')

        if segmentation.get('multipleModelMethod') not in ['sum']:
            raise Exception('PMML model ensemble should use sum.')

        # Parse segments
        segments = segmentation.findall('Segment')
        valid_segments = [
            segment for segment in segments if segment.find('True') is not None
            and segment.find('TreeModel') is not None
        ]

        n_estimators = len(valid_segments)
        self.n_outputs_ = 1
        GradientBoostingRegressor.__init__(self, n_estimators=n_estimators)

        clf = DecisionTreeRegressor(random_state=123)
        try:
            clf.n_features_in_ = self.n_features_in_
        except AttributeError:
            clf.n_features_ = self.n_features_
        clf.n_outputs_ = self.n_outputs_
        self.template_estimator = clf

        self._check_params()
        self._init_state()

        mean = mining_model.find('Targets').find('Target').get(
            'rescaleConstant', 0)
        self.init_.constant_ = np.array([mean])
        self.init_.n_outputs_ = 1

        for x, y in np.ndindex(self.estimators_.shape):
            factor = float(
                mining_model.find('Targets').find('Target').get(
                    'rescaleFactor', 1))
            self.estimators_[x, y] = get_tree(self,
                                              valid_segments[x],
                                              rescale_factor=factor)

        # Required after constructing trees, because categories may be inferred in
        # the parsing process
        target = self.target_field.get('name')
        fields = [
            field for name, field in self.fields.items() if name != target
        ]
        for x, y in np.ndindex(self.estimators_.shape):
            clf = self.estimators_[x, y]
            n_categories = np.asarray([
                len(self.field_mapping[field.get('name')][1].categories)
                if field.get('optype') == 'categorical' else -1
                for field in fields if field.tag == 'DataField'
            ],
                                      dtype=np.int32,
                                      order='C')
            clf.n_categories = n_categories
            clf.tree_.set_n_categories(n_categories)

        self.categorical = [
            x != -1 for x in self.estimators_[0, 0].n_categories
        ]