def setUp(self): self.model = DecisionTreeRegressor() self.init_data() self.converter = DecisionTreeConverter( estimator=self.model, context=self.ctx, mode=DecisionTreeConverter.MODE_REGRESSION)
class TestDecisionTreeRegressorConverter(TestCase): def setUp(self): np.random.seed(1) self.est = DecisionTreeRegressor(max_depth=2) self.est.fit([ [0, 0], [0, 1], [1, 0], [1, 1], ], [0, 1, 1, 1]) self.ctx = TransformationContext( input=[IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one'])], model=[IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one'])], derived=[], output=[IntegerNumericFeature('output')] ) self.converter = DecisionTreeConverter( estimator=self.est, context=self.ctx, mode=DecisionTreeConverter.MODE_REGRESSION ) def test_transform(self): p = self.converter.pmml() tm = p.TreeModel[0] assert tm.MiningSchema is not None, 'Missing mining schema' assert len(tm.MiningSchema.MiningField) == 3, 'Wrong number of mining fields' assert tm.Node is not None, 'Missing root node' assert tm.Node.recordCount == 4 assert tm.Node.True_ is not None, 'Root condition should always be True'
def setUp(self): np.random.seed(1) self.est = DecisionTreeRegressor(max_depth=2) self.est.fit([ [0, 0], [0, 1], [1, 0], [1, 1], ], [0, 1, 1, 1]) self.ctx = TransformationContext({ Schema.INPUT: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.MODEL: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.DERIVED: [], Schema.OUTPUT: [ IntegerNumericFeature('output') ] }) self.converter = DecisionTreeConverter( estimator=self.est, context=self.ctx, mode=ModelMode.REGRESSION )
def segmentation(self): """ Build a segmentation (sequence of estimators) :return: Segmentation element """ # there is no notion of weighted sum, so we should take weighted average and multiply result by total weight # in output transformation segmentation = pmml.Segmentation(multipleModelMethod="weightedAverage") # build the context for the nested regression models by replacing output categorical feature # with the continuous numeric feature regression_context = copy(self.context) regression_context.schemas[Schema.OUTPUT] = [ RealNumericFeature( name=self.context.schemas[Schema.OUTPUT][0].name, namespace=Schema.NUMERIC.value) ] # first, transform initial estimator init_segment = pmml.Segment(weight=1) init_segment.append(pmml.True_()) init_segment.append( find_converter(self.estimator.init_)(self.estimator.init_, regression_context).model()) segmentation.append(init_segment) for est in self.estimator.estimators_[:, 0]: s = pmml.Segment(weight=self.estimator.learning_rate) s.append(pmml.True_()) s.append( DecisionTreeConverter(est, regression_context, ModelMode.REGRESSION)._model()) segmentation.append(s) return segmentation
def setUp(self): self.model = DecisionTreeClassifier() self.init_data() self.converter = DecisionTreeConverter( estimator=self.model, context=self.ctx, mode=DecisionTreeConverter.MODE_CLASSIFICATION)
def segmentation(self): """ Build a segmentation (sequence of estimators) :return: Segmentation element """ # there is no notion of weighted sum, so we should take weighted average and multiply result by total weight # in output transformation segmentation = pmml.Segmentation(multipleModelMethod="weightedAverage") # first, transform initial estimator init_segment = pmml.Segment(weight=1) init_segment.append(pmml.True_()) init_segment.append( find_converter(self.estimator.init_)(self.estimator.init_, self.context).model()) segmentation.append(init_segment) for est in self.estimator.estimators_[:, 0]: s = pmml.Segment(weight=self.estimator.learning_rate) s.append(pmml.True_()) s.append( DecisionTreeConverter(est, self.context, self.MODE_REGRESSION)._model()) segmentation.append(s) return segmentation
class TestDecisionTreeRegressorConverter(TestCase): def setUp(self): np.random.seed(1) self.est = DecisionTreeRegressor(max_depth=2) self.est.fit([ [0, 0], [0, 1], [1, 0], [1, 1], ], [0, 1, 1, 1]) self.ctx = TransformationContext({ Schema.INPUT: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.MODEL: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.DERIVED: [], Schema.OUTPUT: [IntegerNumericFeature('output')] }) self.converter = DecisionTreeConverter(estimator=self.est, context=self.ctx, mode=ModelMode.REGRESSION) def test_transform(self): p = self.converter.pmml() tm = p.TreeModel[0] assert tm.MiningSchema is not None, 'Missing mining schema' assert len( tm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields' assert tm.Node is not None, 'Missing root node' assert tm.Node.recordCount == 4 assert tm.Node.True_ is not None, 'Root condition should always be True'
def test_transform_with_derived_field(self): self.est = DecisionTreeClassifier(max_depth=2) self.est.fit([ [0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 1], ], [0, 1, 1, 1]) mapping = pmml.MapValues(dataType="double", outputColumn="output") mapping.append(pmml.FieldColumnPair(column="x1", field="x1")) mapping.append(pmml.FieldColumnPair(column="x2", field="x2")) it = pmml.InlineTable() it.append(pmml_row(x1=0, x2='zero', output=0)) it.append(pmml_row(x1=0, x2='one', output=0)) it.append(pmml_row(x1=1, x2='zero', output=0)) it.append(pmml_row(x1=1, x2='one', output=1)) mapping.append(it) self.ctx = TransformationContext({ Schema.INPUT: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.DERIVED: [ DerivedFeature(feature=RealNumericFeature(name='x3'), transformation=mapping) ], Schema.MODEL: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']), RealNumericFeature(name='x3') ], Schema.OUTPUT: [IntegerCategoricalFeature('output', ['neg', 'pos'])] }) self.converter = DecisionTreeConverter(estimator=self.est, context=self.ctx, mode=ModelMode.CLASSIFICATION) self.converter.pmml().toxml()
def test_transform_with_derived_field(self): self.est = DecisionTreeClassifier(max_depth=2) self.est.fit([ [0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 1], ], [0, 1, 1, 1]) mapping = pmml.MapValues(dataType="double", outputColumn="output") mapping.append(pmml.FieldColumnPair(column="x1", field="x1")) mapping.append(pmml.FieldColumnPair(column="x2", field="x2")) it = pmml.InlineTable() mapping_df = pd.DataFrame([ dict(x1=0, x2='zero', output=0), dict(x1=0, x2='one', output=0), dict(x1=1, x2='zero', output=0), dict(x1=1, x2='one', output=1), ]) for idx, line in mapping_df.iterrows(): it.append(pmml_row(**dict(line))) mapping.append(it) mapping_df.set_index(keys=['x1', 'x2']) mapping_f = np.vectorize(lambda x1, x2: mapping_df.ix[x1, x2].output.values[0]) self.ctx = TransformationContext({ Schema.INPUT: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.DERIVED: [ DerivedFeature( feature=RealNumericFeature(name='x3'), transformation=mapping, function=mapping_f ) ], Schema.MODEL: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']), RealNumericFeature(name='x3') ], Schema.OUTPUT: [ IntegerCategoricalFeature('output', ['neg', 'pos']) ] }) self.converter = DecisionTreeConverter( estimator=self.est, context=self.ctx, mode=ModelMode.CLASSIFICATION ) self.converter.pmml().toxml()
def setUp(self): np.random.seed(1) self.est = DecisionTreeRegressor(max_depth=2) self.est.fit([ [0, 0], [0, 1], [1, 0], [1, 1], ], [0, 1, 1, 1]) self.ctx = TransformationContext( input=[ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], model=[ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], derived=[], output=[IntegerNumericFeature('output')]) self.converter = DecisionTreeConverter( estimator=self.est, context=self.ctx, mode=DecisionTreeConverter.MODE_REGRESSION)
def setUp(self): np.random.seed(1) self.est = DecisionTreeRegressor(max_depth=2) self.est.fit([ [0, 0], [0, 1], [1, 0], [1, 1], ], [0, 1, 1, 1]) self.ctx = TransformationContext({ Schema.INPUT: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.MODEL: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.DERIVED: [], Schema.OUTPUT: [IntegerNumericFeature('output')] }) self.converter = DecisionTreeConverter(estimator=self.est, context=self.ctx, mode=ModelMode.REGRESSION)
def segmentation(self): """ Build a segmentation (sequence of estimators) :return: Segmentation element """ segmentation = pmml.Segmentation(multipleModelMethod="majorityVote") for index, est in enumerate(self.estimator.estimators_): s = pmml.Segment(id=index) s.append(pmml.True_()) s.append( DecisionTreeConverter(est, self.context, self.MODE_CLASSIFICATION)._model()) segmentation.append(s) return segmentation
def test_transform_with_derived_field(self): self.est = DecisionTreeClassifier(max_depth=2) self.est.fit([ [0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 1], ], [0, 1, 1, 1]) mapping = pmml.MapValues(dataType="double", outputColumn="output") mapping.append(pmml.FieldColumnPair(column="x1", field="x1")) mapping.append(pmml.FieldColumnPair(column="x2", field="x2")) it = pmml.InlineTable() it.append(pmml_row(x1=0, x2='zero', output=0)) it.append(pmml_row(x1=0, x2='one', output=0)) it.append(pmml_row(x1=1, x2='zero', output=0)) it.append(pmml_row(x1=1, x2='one', output=1)) mapping.append(it) self.ctx = TransformationContext({ Schema.INPUT: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.DERIVED: [ DerivedFeature( feature=RealNumericFeature(name='x3'), transformation=mapping ) ], Schema.MODEL: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']), RealNumericFeature(name='x3') ], Schema.OUTPUT: [ IntegerCategoricalFeature('output', ['neg', 'pos']) ] }) self.converter = DecisionTreeConverter( estimator=self.est, context=self.ctx, mode=ModelMode.CLASSIFICATION ) self.converter.pmml().toxml()
def setUp(self): np.random.seed(1) self.est = DecisionTreeRegressor(max_depth=2) self.est.fit([ [0, 0], [0, 1], [1, 0], [1, 1], ], [0, 1, 1, 1]) self.ctx = TransformationContext( input=[IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one'])], model=[IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one'])], derived=[], output=[IntegerNumericFeature('output')] ) self.converter = DecisionTreeConverter( estimator=self.est, context=self.ctx, mode=DecisionTreeConverter.MODE_REGRESSION )
class TestDecisionTreeClassifierConverter(TestCase): def setUp(self): np.random.seed(1) self.est = DecisionTreeClassifier(max_depth=2) self.est.fit([ [0, 0], [0, 1], [1, 0], [1, 1], ], [0, 1, 1, 1]) self.ctx = TransformationContext({ Schema.INPUT: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.MODEL: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.DERIVED: [], Schema.OUTPUT: [IntegerNumericFeature('output')] }) self.converter = DecisionTreeConverter(estimator=self.est, context=self.ctx, mode=ModelMode.CLASSIFICATION) def test_transform(self): p = self.converter.pmml() tm = p.TreeModel[0] assert tm.MiningSchema is not None, 'Missing mining schema' assert len( tm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields' assert tm.Node is not None, 'Missing root node' assert tm.Node.recordCount == 4 assert tm.Node.True_ is not None, 'Root condition should always be True' def test_transform_with_derived_field(self): self.est = DecisionTreeClassifier(max_depth=2) self.est.fit([ [0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 1], ], [0, 1, 1, 1]) mapping = pmml.MapValues(dataType="double", outputColumn="output") mapping.append(pmml.FieldColumnPair(column="x1", field="x1")) mapping.append(pmml.FieldColumnPair(column="x2", field="x2")) it = pmml.InlineTable() it.append(pmml_row(x1=0, x2='zero', output=0)) it.append(pmml_row(x1=0, x2='one', output=0)) it.append(pmml_row(x1=1, x2='zero', output=0)) it.append(pmml_row(x1=1, x2='one', output=1)) mapping.append(it) self.ctx = TransformationContext({ Schema.INPUT: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.DERIVED: [ DerivedFeature(feature=RealNumericFeature(name='x3'), transformation=mapping) ], Schema.MODEL: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']), RealNumericFeature(name='x3') ], Schema.OUTPUT: [IntegerCategoricalFeature('output', ['neg', 'pos'])] }) self.converter = DecisionTreeConverter(estimator=self.est, context=self.ctx, mode=ModelMode.CLASSIFICATION) self.converter.pmml().toxml()
def setUp(self): self.model = DecisionTreeClassifier(max_depth=2) self.init_data() self.converter = DecisionTreeConverter(estimator=self.model, context=self.ctx, mode=ModelMode.CLASSIFICATION)
class TestDecisionTreeClassifierConverter(TestCase): def setUp(self): np.random.seed(1) self.est = DecisionTreeClassifier(max_depth=2) self.est.fit([ [0, 0], [0, 1], [1, 0], [1, 1], ], [0, 1, 1, 1]) self.ctx = TransformationContext( input=[IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one'])], model=[IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one'])], derived=[], output=[IntegerCategoricalFeature('output', ['neg', 'pos'])] ) self.converter = DecisionTreeConverter( estimator=self.est, context=self.ctx, mode=DecisionTreeConverter.MODE_CLASSIFICATION ) def test_transform(self): p = self.converter.pmml() tm = p.TreeModel[0] assert tm.MiningSchema is not None, 'Missing mining schema' assert len(tm.MiningSchema.MiningField) == 3, 'Wrong number of mining fields' assert tm.Node is not None, 'Missing root node' assert tm.Node.recordCount == 4 assert tm.Node.True_ is not None, 'Root condition should always be True' def test_transform_with_derived_field(self): self.est = DecisionTreeClassifier(max_depth=2) self.est.fit([ [0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 1], ], [0, 1, 1, 1]) mapping = pmml.MapValues(dataType="double", outputColumn="output") mapping.append(pmml.FieldColumnPair(column="x1", field="x1")) mapping.append(pmml.FieldColumnPair(column="x2", field="x2")) it = pmml.InlineTable() it.append(pmml_row(x1=0, x2='zero', output=0)) it.append(pmml_row(x1=0, x2='one', output=0)) it.append(pmml_row(x1=1, x2='zero', output=0)) it.append(pmml_row(x1=1, x2='one', output=1)) mapping.append(it) self.ctx = TransformationContext( input=[ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']), ], derived=[ DerivedFeature( feature=RealNumericFeature(name='x3'), transformation=mapping ) ], model=[ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']), RealNumericFeature(name='x3') ], output=[IntegerCategoricalFeature('output', ['neg', 'pos'])] ) self.converter = DecisionTreeConverter( estimator=self.est, context=self.ctx, mode=DecisionTreeConverter.MODE_CLASSIFICATION ) self.converter.pmml().toxml()
class TestDecisionTreeClassifierConverter(TestCase): def setUp(self): np.random.seed(1) self.est = DecisionTreeClassifier(max_depth=2) self.est.fit([ [0, 0], [0, 1], [1, 0], [1, 1], ], [0, 1, 1, 1]) self.ctx = TransformationContext({ Schema.INPUT: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.MODEL: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.DERIVED: [], Schema.OUTPUT: [ IntegerNumericFeature('output') ] }) self.converter = DecisionTreeConverter( estimator=self.est, context=self.ctx, mode=ModelMode.CLASSIFICATION ) def test_transform(self): p = self.converter.pmml() tm = p.TreeModel[0] assert tm.MiningSchema is not None, 'Missing mining schema' assert len(tm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields' assert tm.Node is not None, 'Missing root node' assert tm.Node.recordCount == 4 assert tm.Node.True_ is not None, 'Root condition should always be True' def test_transform_with_derived_field(self): self.est = DecisionTreeClassifier(max_depth=2) self.est.fit([ [0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 1, 1], ], [0, 1, 1, 1]) mapping = pmml.MapValues(dataType="double", outputColumn="output") mapping.append(pmml.FieldColumnPair(column="x1", field="x1")) mapping.append(pmml.FieldColumnPair(column="x2", field="x2")) it = pmml.InlineTable() mapping_df = pd.DataFrame([ dict(x1=0, x2='zero', output=0), dict(x1=0, x2='one', output=0), dict(x1=1, x2='zero', output=0), dict(x1=1, x2='one', output=1), ]) for idx, line in mapping_df.iterrows(): it.append(pmml_row(**dict(line))) mapping.append(it) mapping_df.set_index(keys=['x1', 'x2']) mapping_f = np.vectorize(lambda x1, x2: mapping_df.ix[x1, x2].output.values[0]) self.ctx = TransformationContext({ Schema.INPUT: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']) ], Schema.DERIVED: [ DerivedFeature( feature=RealNumericFeature(name='x3'), transformation=mapping, function=mapping_f ) ], Schema.MODEL: [ IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one']), RealNumericFeature(name='x3') ], Schema.OUTPUT: [ IntegerCategoricalFeature('output', ['neg', 'pos']) ] }) self.converter = DecisionTreeConverter( estimator=self.est, context=self.ctx, mode=ModelMode.CLASSIFICATION ) self.converter.pmml().toxml()