def __init__(self, pmml): PMMLBaseRegressor.__init__(self, pmml) OneHotEncodingMixin.__init__(self) # Import coefficients and intercepts model = self.root.find('RegressionModel') if model is None: raise Exception('PMML model does not contain RegressionModel.') tables = model.findall('RegressionTable') self.coef_ = np.array([ _get_coefficients(self, table) for table in tables ]) self.intercept_ = np.array([ float(table.get('intercept')) for table in tables ]) if self.coef_.shape[0] == 1: self.coef_ = self.coef_[0] if self.intercept_.shape[0] == 1: self.intercept_ = self.intercept_[0]
def __init__(self, pmml): PMMLBaseRegressor.__init__(self, pmml) # Setup a column transformer to deal with categorical variables target = self.target_field.get('name') fields = [field for name, field in self.fields.items() if name != target] def encoder_for(field): if field.get('optype') != 'categorical': return 'passthrough' encoder = OneHotEncoder() encoder.categories_ = np.array([self.field_mapping[field.get('name')][1].categories]) encoder._legacy_mode = False return encoder transformer = ColumnTransformer( transformers=[ (field.get('name'), encoder_for(field), [self.field_mapping[field.get('name')][0]]) for field in fields if field.tag == 'DataField' ] ) X = np.array([[0 for field in fields if field.tag == "DataField"]]) transformer._validate_transformers() transformer._validate_column_callables(X) transformer._validate_remainder(X) transformer.transformers_ = transformer.transformers transformer.sparse_output_ = False transformer._feature_names_in = None self.transformer = transformer
def __init__(self, pmml): PMMLBaseRegressor.__init__(self, pmml) tree_model = self.root.find('TreeModel') if tree_model is None: raise Exception('PMML model does not contain TreeModel.') # Parse tree self.n_outputs_ = 1 n_classes = np.array([1] * self.n_outputs_, dtype=np.intp) try: self.tree_ = Tree(self.n_features_in_, n_classes, self.n_outputs_, np.array([], dtype=np.int32)) except AttributeError: self.tree_ = Tree(self.n_features_, n_classes, self.n_outputs_, np.array([], dtype=np.int32)) split = tree_model.get('splitCharacteristic') if split == 'binarySplit': first_node = tree_model.find('Node') else: first_node = unflatten(tree_model.find('Node')) nodes, values = construct_tree(first_node, None, self.field_mapping, rescale_factor=0.1) node_ndarray = np.ascontiguousarray(nodes, dtype=NODE_DTYPE) value_ndarray = np.ascontiguousarray(values) max_depth = None state = { 'max_depth': (2**31) - 1 if max_depth is None else max_depth, 'node_count': node_ndarray.shape[0], 'nodes': node_ndarray, 'values': value_ndarray } self.tree_.__setstate__(state) # Required after constructing trees, because categories may be inferred in # the parsing process target = self.target_field.get('name') fields = [ field for name, field in self.fields.items() if name != target ] n_categories = np.asarray([ len(self.field_mapping[field.get('name')][1].categories) if field.get('optype') == 'categorical' else -1 for field in fields if field.tag == 'DataField' ], dtype=np.int32, order='C') self.tree_.set_n_categories(n_categories)
def __init__(self, pmml): PMMLBaseRegressor.__init__(self, pmml) OneHotEncodingMixin.__init__(self) # Import coefficients and intercepts model = self.root.find('GeneralRegressionModel') if model is None: raise Exception( 'PMML model does not contain GeneralRegressionModel.') self.coef_ = np.array(_get_coefficients(self, model)) self.intercept_ = _get_intercept(model)
def fit(self, x, y): return PMMLBaseRegressor.fit(self, x, y)
def __init__(self, pmml, n_jobs=None): PMMLBaseRegressor.__init__(self, pmml) mining_model = self.root.find('MiningModel') if mining_model is None: raise Exception('PMML model does not contain MiningModel.') segmentation = mining_model.find('Segmentation') if segmentation is None: raise Exception('PMML model does not contain Segmentation.') if segmentation.get('multipleModelMethod') not in [ 'majorityVote', 'average' ]: raise Exception( 'PMML model ensemble should use majority vote or average.') # Parse segments segments = segmentation.findall('Segment') valid_segments = [ segment for segment in segments if segment.find('True') is not None ] if len(valid_segments) < len(segments): warnings.warn( 'Warning: {} segment(s) ignored because of unsupported predicate.' .format(len(segments) - len(valid_segments))) n_estimators = len(valid_segments) self.n_outputs_ = 1 RandomForestRegressor.__init__(self, n_estimators=n_estimators, n_jobs=n_jobs) self._validate_estimator() clf = self._make_estimator(append=False, random_state=123) try: clf.n_features_in_ = self.n_features_in_ except AttributeError: clf.n_features_ = self.n_features_ clf.n_outputs_ = self.n_outputs_ self.template_estimator = clf self.estimators_ = [ get_tree(self, s, rescale_factor=0.1) for s in valid_segments ] # Required after constructing trees, because categories may be inferred in # the parsing process target = self.target_field.get('name') fields = [ field for name, field in self.fields.items() if name != target ] for clf in self.estimators_: n_categories = np.asarray([ len(self.field_mapping[field.get('name')][1].categories) if field.get('optype') == 'categorical' else -1 for field in fields if field.tag == 'DataField' ], dtype=np.int32, order='C') clf.n_categories = n_categories clf.tree_.set_n_categories(n_categories) self.categorical = [x != -1 for x in self.estimators_[0].n_categories]
def __init__(self, pmml, n_jobs=None): PMMLBaseRegressor.__init__(self, pmml) KNeighborsRegressor.__init__(self, n_jobs=n_jobs) PMMLBaseKNN.__init__(self) KNeighborsRegressor.fit(self, self._X, self._y)
def __init__(self, pmml): PMMLBaseRegressor.__init__(self, pmml) OneHotEncodingMixin.__init__(self) SVR.__init__(self) PMMLBaseSVM.__init__(self)
def __init__(self, pmml): PMMLBaseRegressor.__init__(self, pmml) mining_model = self.root.find('MiningModel') if mining_model is None: raise Exception('PMML model does not contain MiningModel.') segmentation = mining_model.find('Segmentation') if segmentation is None: raise Exception('PMML model does not contain Segmentation.') if segmentation.get('multipleModelMethod') not in ['sum']: raise Exception('PMML model ensemble should use sum.') # Parse segments segments = segmentation.findall('Segment') valid_segments = [ segment for segment in segments if segment.find('True') is not None and segment.find('TreeModel') is not None ] n_estimators = len(valid_segments) self.n_outputs_ = 1 GradientBoostingRegressor.__init__(self, n_estimators=n_estimators) clf = DecisionTreeRegressor(random_state=123) try: clf.n_features_in_ = self.n_features_in_ except AttributeError: clf.n_features_ = self.n_features_ clf.n_outputs_ = self.n_outputs_ self.template_estimator = clf self._check_params() self._init_state() mean = mining_model.find('Targets').find('Target').get( 'rescaleConstant', 0) self.init_.constant_ = np.array([mean]) self.init_.n_outputs_ = 1 for x, y in np.ndindex(self.estimators_.shape): factor = float( mining_model.find('Targets').find('Target').get( 'rescaleFactor', 1)) self.estimators_[x, y] = get_tree(self, valid_segments[x], rescale_factor=factor) # Required after constructing trees, because categories may be inferred in # the parsing process target = self.target_field.get('name') fields = [ field for name, field in self.fields.items() if name != target ] for x, y in np.ndindex(self.estimators_.shape): clf = self.estimators_[x, y] n_categories = np.asarray([ len(self.field_mapping[field.get('name')][1].categories) if field.get('optype') == 'categorical' else -1 for field in fields if field.tag == 'DataField' ], dtype=np.int32, order='C') clf.n_categories = n_categories clf.tree_.set_n_categories(n_categories) self.categorical = [ x != -1 for x in self.estimators_[0, 0].n_categories ]