def setUp(self):
     self.model = DecisionTreeRegressor()
     self.init_data()
     self.converter = DecisionTreeConverter(
         estimator=self.model,
         context=self.ctx,
         mode=DecisionTreeConverter.MODE_REGRESSION)
class TestDecisionTreeRegressorConverter(TestCase):
    def setUp(self):
        np.random.seed(1)
        self.est = DecisionTreeRegressor(max_depth=2)
        self.est.fit([
            [0, 0],
            [0, 1],
            [1, 0],
            [1, 1],
        ], [0, 1, 1, 1])
        self.ctx = TransformationContext(
            input=[IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one'])],
            model=[IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one'])],
            derived=[],
            output=[IntegerNumericFeature('output')]
        )
        self.converter = DecisionTreeConverter(
            estimator=self.est,
            context=self.ctx,
            mode=DecisionTreeConverter.MODE_REGRESSION
        )

    def test_transform(self):
        p = self.converter.pmml()
        tm = p.TreeModel[0]
        assert tm.MiningSchema is not None, 'Missing mining schema'
        assert len(tm.MiningSchema.MiningField) == 3, 'Wrong number of mining fields'
        assert tm.Node is not None, 'Missing root node'
        assert tm.Node.recordCount == 4
        assert tm.Node.True_ is not None, 'Root condition should always be True'
 def setUp(self):
     np.random.seed(1)
     self.est = DecisionTreeRegressor(max_depth=2)
     self.est.fit([
         [0, 0],
         [0, 1],
         [1, 0],
         [1, 1],
     ], [0, 1, 1, 1])
     self.ctx = TransformationContext({
         Schema.INPUT: [
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one'])
         ],
         Schema.MODEL: [
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one'])
         ],
         Schema.DERIVED: [],
         Schema.OUTPUT: [
             IntegerNumericFeature('output')
         ]
     })
     self.converter = DecisionTreeConverter(
         estimator=self.est,
         context=self.ctx,
         mode=ModelMode.REGRESSION
     )
Пример #4
0
    def segmentation(self):
        """
        Build a segmentation (sequence of estimators)
        :return: Segmentation element
        """
        # there is no notion of weighted sum, so we should take weighted average and multiply result by total weight
        # in output transformation
        segmentation = pmml.Segmentation(multipleModelMethod="weightedAverage")

        # build the context for the nested regression models by replacing output categorical feature
        # with the continuous numeric feature
        regression_context = copy(self.context)
        regression_context.schemas[Schema.OUTPUT] = [
            RealNumericFeature(
                name=self.context.schemas[Schema.OUTPUT][0].name,
                namespace=Schema.NUMERIC.value)
        ]

        # first, transform initial estimator
        init_segment = pmml.Segment(weight=1)
        init_segment.append(pmml.True_())
        init_segment.append(
            find_converter(self.estimator.init_)(self.estimator.init_,
                                                 regression_context).model())
        segmentation.append(init_segment)

        for est in self.estimator.estimators_[:, 0]:
            s = pmml.Segment(weight=self.estimator.learning_rate)
            s.append(pmml.True_())
            s.append(
                DecisionTreeConverter(est, regression_context,
                                      ModelMode.REGRESSION)._model())
            segmentation.append(s)

        return segmentation
 def setUp(self):
     self.model = DecisionTreeClassifier()
     self.init_data()
     self.converter = DecisionTreeConverter(
         estimator=self.model,
         context=self.ctx,
         mode=DecisionTreeConverter.MODE_CLASSIFICATION)
Пример #6
0
    def segmentation(self):
        """
        Build a segmentation (sequence of estimators)
        :return: Segmentation element
        """
        # there is no notion of weighted sum, so we should take weighted average and multiply result by total weight
        # in output transformation
        segmentation = pmml.Segmentation(multipleModelMethod="weightedAverage")

        # first, transform initial estimator
        init_segment = pmml.Segment(weight=1)
        init_segment.append(pmml.True_())
        init_segment.append(
            find_converter(self.estimator.init_)(self.estimator.init_,
                                                 self.context).model())
        segmentation.append(init_segment)

        for est in self.estimator.estimators_[:, 0]:
            s = pmml.Segment(weight=self.estimator.learning_rate)
            s.append(pmml.True_())
            s.append(
                DecisionTreeConverter(est, self.context,
                                      self.MODE_REGRESSION)._model())
            segmentation.append(s)

        return segmentation
class TestDecisionTreeRegressorConverter(TestCase):
    def setUp(self):
        np.random.seed(1)
        self.est = DecisionTreeRegressor(max_depth=2)
        self.est.fit([
            [0, 0],
            [0, 1],
            [1, 0],
            [1, 1],
        ], [0, 1, 1, 1])
        self.ctx = TransformationContext({
            Schema.INPUT: [
                IntegerNumericFeature('x1'),
                StringCategoricalFeature('x2', ['zero', 'one'])
            ],
            Schema.MODEL: [
                IntegerNumericFeature('x1'),
                StringCategoricalFeature('x2', ['zero', 'one'])
            ],
            Schema.DERIVED: [],
            Schema.OUTPUT: [IntegerNumericFeature('output')]
        })
        self.converter = DecisionTreeConverter(estimator=self.est,
                                               context=self.ctx,
                                               mode=ModelMode.REGRESSION)

    def test_transform(self):
        p = self.converter.pmml()
        tm = p.TreeModel[0]
        assert tm.MiningSchema is not None, 'Missing mining schema'
        assert len(
            tm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields'
        assert tm.Node is not None, 'Missing root node'
        assert tm.Node.recordCount == 4
        assert tm.Node.True_ is not None, 'Root condition should always be True'
 def test_transform_with_derived_field(self):
     self.est = DecisionTreeClassifier(max_depth=2)
     self.est.fit([
         [0, 0, 0],
         [0, 1, 0],
         [1, 0, 0],
         [1, 1, 1],
     ], [0, 1, 1, 1])
     mapping = pmml.MapValues(dataType="double", outputColumn="output")
     mapping.append(pmml.FieldColumnPair(column="x1", field="x1"))
     mapping.append(pmml.FieldColumnPair(column="x2", field="x2"))
     it = pmml.InlineTable()
     it.append(pmml_row(x1=0, x2='zero', output=0))
     it.append(pmml_row(x1=0, x2='one', output=0))
     it.append(pmml_row(x1=1, x2='zero', output=0))
     it.append(pmml_row(x1=1, x2='one', output=1))
     mapping.append(it)
     self.ctx = TransformationContext({
         Schema.INPUT: [
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one'])
         ],
         Schema.DERIVED: [
             DerivedFeature(feature=RealNumericFeature(name='x3'),
                            transformation=mapping)
         ],
         Schema.MODEL: [
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one']),
             RealNumericFeature(name='x3')
         ],
         Schema.OUTPUT:
         [IntegerCategoricalFeature('output', ['neg', 'pos'])]
     })
     self.converter = DecisionTreeConverter(estimator=self.est,
                                            context=self.ctx,
                                            mode=ModelMode.CLASSIFICATION)
     self.converter.pmml().toxml()
 def test_transform_with_derived_field(self):
     self.est = DecisionTreeClassifier(max_depth=2)
     self.est.fit([
         [0, 0, 0],
         [0, 1, 0],
         [1, 0, 0],
         [1, 1, 1],
     ], [0, 1, 1, 1])
     mapping = pmml.MapValues(dataType="double", outputColumn="output")
     mapping.append(pmml.FieldColumnPair(column="x1", field="x1"))
     mapping.append(pmml.FieldColumnPair(column="x2", field="x2"))
     it = pmml.InlineTable()
     mapping_df = pd.DataFrame([
         dict(x1=0, x2='zero', output=0),
         dict(x1=0, x2='one', output=0),
         dict(x1=1, x2='zero', output=0),
         dict(x1=1, x2='one', output=1),
     ])
     for idx, line in mapping_df.iterrows():
         it.append(pmml_row(**dict(line)))
     mapping.append(it)
     mapping_df.set_index(keys=['x1', 'x2'])
     mapping_f = np.vectorize(lambda x1, x2: mapping_df.ix[x1, x2].output.values[0])
     self.ctx = TransformationContext({
         Schema.INPUT: [
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one'])
         ],
         Schema.DERIVED: [
             DerivedFeature(
                 feature=RealNumericFeature(name='x3'),
                 transformation=mapping,
                 function=mapping_f
             )
         ],
         Schema.MODEL: [
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one']),
             RealNumericFeature(name='x3')
         ],
         Schema.OUTPUT: [
             IntegerCategoricalFeature('output', ['neg', 'pos'])
         ]
     })
     self.converter = DecisionTreeConverter(
         estimator=self.est,
         context=self.ctx,
         mode=ModelMode.CLASSIFICATION
     )
     self.converter.pmml().toxml()
 def setUp(self):
     np.random.seed(1)
     self.est = DecisionTreeRegressor(max_depth=2)
     self.est.fit([
         [0, 0],
         [0, 1],
         [1, 0],
         [1, 1],
     ], [0, 1, 1, 1])
     self.ctx = TransformationContext(
         input=[
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one'])
         ],
         model=[
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one'])
         ],
         derived=[],
         output=[IntegerNumericFeature('output')])
     self.converter = DecisionTreeConverter(
         estimator=self.est,
         context=self.ctx,
         mode=DecisionTreeConverter.MODE_REGRESSION)
 def setUp(self):
     np.random.seed(1)
     self.est = DecisionTreeRegressor(max_depth=2)
     self.est.fit([
         [0, 0],
         [0, 1],
         [1, 0],
         [1, 1],
     ], [0, 1, 1, 1])
     self.ctx = TransformationContext({
         Schema.INPUT: [
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one'])
         ],
         Schema.MODEL: [
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one'])
         ],
         Schema.DERIVED: [],
         Schema.OUTPUT: [IntegerNumericFeature('output')]
     })
     self.converter = DecisionTreeConverter(estimator=self.est,
                                            context=self.ctx,
                                            mode=ModelMode.REGRESSION)
Пример #12
0
    def segmentation(self):
        """
        Build a segmentation (sequence of estimators)
        :return: Segmentation element
        """
        segmentation = pmml.Segmentation(multipleModelMethod="majorityVote")

        for index, est in enumerate(self.estimator.estimators_):
            s = pmml.Segment(id=index)
            s.append(pmml.True_())
            s.append(
                DecisionTreeConverter(est, self.context,
                                      self.MODE_CLASSIFICATION)._model())
            segmentation.append(s)

        return segmentation
 def test_transform_with_derived_field(self):
     self.est = DecisionTreeClassifier(max_depth=2)
     self.est.fit([
         [0, 0, 0],
         [0, 1, 0],
         [1, 0, 0],
         [1, 1, 1],
     ], [0, 1, 1, 1])
     mapping = pmml.MapValues(dataType="double", outputColumn="output")
     mapping.append(pmml.FieldColumnPair(column="x1", field="x1"))
     mapping.append(pmml.FieldColumnPair(column="x2", field="x2"))
     it = pmml.InlineTable()
     it.append(pmml_row(x1=0, x2='zero', output=0))
     it.append(pmml_row(x1=0, x2='one', output=0))
     it.append(pmml_row(x1=1, x2='zero', output=0))
     it.append(pmml_row(x1=1, x2='one', output=1))
     mapping.append(it)
     self.ctx = TransformationContext({
         Schema.INPUT: [
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one'])
         ],
         Schema.DERIVED: [
             DerivedFeature(
                 feature=RealNumericFeature(name='x3'),
                 transformation=mapping
             )
         ],
         Schema.MODEL: [
             IntegerNumericFeature('x1'),
             StringCategoricalFeature('x2', ['zero', 'one']),
             RealNumericFeature(name='x3')
         ],
         Schema.OUTPUT: [
             IntegerCategoricalFeature('output', ['neg', 'pos'])
         ]
     })
     self.converter = DecisionTreeConverter(
         estimator=self.est,
         context=self.ctx,
         mode=ModelMode.CLASSIFICATION
     )
     self.converter.pmml().toxml()
 def setUp(self):
     np.random.seed(1)
     self.est = DecisionTreeRegressor(max_depth=2)
     self.est.fit([
         [0, 0],
         [0, 1],
         [1, 0],
         [1, 1],
     ], [0, 1, 1, 1])
     self.ctx = TransformationContext(
         input=[IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one'])],
         model=[IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one'])],
         derived=[],
         output=[IntegerNumericFeature('output')]
     )
     self.converter = DecisionTreeConverter(
         estimator=self.est,
         context=self.ctx,
         mode=DecisionTreeConverter.MODE_REGRESSION
     )
class TestDecisionTreeClassifierConverter(TestCase):
    def setUp(self):
        np.random.seed(1)
        self.est = DecisionTreeClassifier(max_depth=2)
        self.est.fit([
            [0, 0],
            [0, 1],
            [1, 0],
            [1, 1],
        ], [0, 1, 1, 1])
        self.ctx = TransformationContext({
            Schema.INPUT: [
                IntegerNumericFeature('x1'),
                StringCategoricalFeature('x2', ['zero', 'one'])
            ],
            Schema.MODEL: [
                IntegerNumericFeature('x1'),
                StringCategoricalFeature('x2', ['zero', 'one'])
            ],
            Schema.DERIVED: [],
            Schema.OUTPUT: [IntegerNumericFeature('output')]
        })
        self.converter = DecisionTreeConverter(estimator=self.est,
                                               context=self.ctx,
                                               mode=ModelMode.CLASSIFICATION)

    def test_transform(self):
        p = self.converter.pmml()
        tm = p.TreeModel[0]
        assert tm.MiningSchema is not None, 'Missing mining schema'
        assert len(
            tm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields'
        assert tm.Node is not None, 'Missing root node'
        assert tm.Node.recordCount == 4
        assert tm.Node.True_ is not None, 'Root condition should always be True'

    def test_transform_with_derived_field(self):
        self.est = DecisionTreeClassifier(max_depth=2)
        self.est.fit([
            [0, 0, 0],
            [0, 1, 0],
            [1, 0, 0],
            [1, 1, 1],
        ], [0, 1, 1, 1])
        mapping = pmml.MapValues(dataType="double", outputColumn="output")
        mapping.append(pmml.FieldColumnPair(column="x1", field="x1"))
        mapping.append(pmml.FieldColumnPair(column="x2", field="x2"))
        it = pmml.InlineTable()
        it.append(pmml_row(x1=0, x2='zero', output=0))
        it.append(pmml_row(x1=0, x2='one', output=0))
        it.append(pmml_row(x1=1, x2='zero', output=0))
        it.append(pmml_row(x1=1, x2='one', output=1))
        mapping.append(it)
        self.ctx = TransformationContext({
            Schema.INPUT: [
                IntegerNumericFeature('x1'),
                StringCategoricalFeature('x2', ['zero', 'one'])
            ],
            Schema.DERIVED: [
                DerivedFeature(feature=RealNumericFeature(name='x3'),
                               transformation=mapping)
            ],
            Schema.MODEL: [
                IntegerNumericFeature('x1'),
                StringCategoricalFeature('x2', ['zero', 'one']),
                RealNumericFeature(name='x3')
            ],
            Schema.OUTPUT:
            [IntegerCategoricalFeature('output', ['neg', 'pos'])]
        })
        self.converter = DecisionTreeConverter(estimator=self.est,
                                               context=self.ctx,
                                               mode=ModelMode.CLASSIFICATION)
        self.converter.pmml().toxml()
 def setUp(self):
     self.model = DecisionTreeClassifier(max_depth=2)
     self.init_data()
     self.converter = DecisionTreeConverter(estimator=self.model,
                                            context=self.ctx,
                                            mode=ModelMode.CLASSIFICATION)
class TestDecisionTreeClassifierConverter(TestCase):
    def setUp(self):
        np.random.seed(1)
        self.est = DecisionTreeClassifier(max_depth=2)
        self.est.fit([
            [0, 0],
            [0, 1],
            [1, 0],
            [1, 1],
        ], [0, 1, 1, 1])
        self.ctx = TransformationContext(
            input=[IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one'])],
            model=[IntegerNumericFeature('x1'), StringCategoricalFeature('x2', ['zero', 'one'])],
            derived=[],
            output=[IntegerCategoricalFeature('output', ['neg', 'pos'])]
        )
        self.converter = DecisionTreeConverter(
            estimator=self.est,
            context=self.ctx,
            mode=DecisionTreeConverter.MODE_CLASSIFICATION
        )

    def test_transform(self):
        p = self.converter.pmml()
        tm = p.TreeModel[0]
        assert tm.MiningSchema is not None, 'Missing mining schema'
        assert len(tm.MiningSchema.MiningField) == 3, 'Wrong number of mining fields'
        assert tm.Node is not None, 'Missing root node'
        assert tm.Node.recordCount == 4
        assert tm.Node.True_ is not None, 'Root condition should always be True'

    def test_transform_with_derived_field(self):
        self.est = DecisionTreeClassifier(max_depth=2)
        self.est.fit([
            [0, 0, 0],
            [0, 1, 0],
            [1, 0, 0],
            [1, 1, 1],
        ], [0, 1, 1, 1])
        mapping = pmml.MapValues(dataType="double", outputColumn="output")
        mapping.append(pmml.FieldColumnPair(column="x1", field="x1"))
        mapping.append(pmml.FieldColumnPair(column="x2", field="x2"))
        it = pmml.InlineTable()
        it.append(pmml_row(x1=0, x2='zero', output=0))
        it.append(pmml_row(x1=0, x2='one', output=0))
        it.append(pmml_row(x1=1, x2='zero', output=0))
        it.append(pmml_row(x1=1, x2='one', output=1))
        mapping.append(it)
        self.ctx = TransformationContext(
            input=[
                IntegerNumericFeature('x1'),
                StringCategoricalFeature('x2', ['zero', 'one']),
            ],
            derived=[
                DerivedFeature(
                    feature=RealNumericFeature(name='x3'),
                    transformation=mapping
                )
            ],
            model=[
                IntegerNumericFeature('x1'),
                StringCategoricalFeature('x2', ['zero', 'one']),
                RealNumericFeature(name='x3')
            ],
            output=[IntegerCategoricalFeature('output', ['neg', 'pos'])]
        )
        self.converter = DecisionTreeConverter(
            estimator=self.est,
            context=self.ctx,
            mode=DecisionTreeConverter.MODE_CLASSIFICATION
        )
        self.converter.pmml().toxml()
class TestDecisionTreeClassifierConverter(TestCase):
    def setUp(self):
        np.random.seed(1)
        self.est = DecisionTreeClassifier(max_depth=2)
        self.est.fit([
            [0, 0],
            [0, 1],
            [1, 0],
            [1, 1],
        ], [0, 1, 1, 1])
        self.ctx = TransformationContext({
            Schema.INPUT: [
                IntegerNumericFeature('x1'),
                StringCategoricalFeature('x2', ['zero', 'one'])
            ],
            Schema.MODEL: [
                IntegerNumericFeature('x1'),
                StringCategoricalFeature('x2', ['zero', 'one'])
            ],
            Schema.DERIVED: [],
            Schema.OUTPUT: [
                IntegerNumericFeature('output')
            ]
        })
        self.converter = DecisionTreeConverter(
            estimator=self.est,
            context=self.ctx,
            mode=ModelMode.CLASSIFICATION
        )

    def test_transform(self):
        p = self.converter.pmml()
        tm = p.TreeModel[0]
        assert tm.MiningSchema is not None, 'Missing mining schema'
        assert len(tm.MiningSchema.MiningField) == 2, 'Wrong number of mining fields'
        assert tm.Node is not None, 'Missing root node'
        assert tm.Node.recordCount == 4
        assert tm.Node.True_ is not None, 'Root condition should always be True'

    def test_transform_with_derived_field(self):
        self.est = DecisionTreeClassifier(max_depth=2)
        self.est.fit([
            [0, 0, 0],
            [0, 1, 0],
            [1, 0, 0],
            [1, 1, 1],
        ], [0, 1, 1, 1])
        mapping = pmml.MapValues(dataType="double", outputColumn="output")
        mapping.append(pmml.FieldColumnPair(column="x1", field="x1"))
        mapping.append(pmml.FieldColumnPair(column="x2", field="x2"))
        it = pmml.InlineTable()
        mapping_df = pd.DataFrame([
            dict(x1=0, x2='zero', output=0),
            dict(x1=0, x2='one', output=0),
            dict(x1=1, x2='zero', output=0),
            dict(x1=1, x2='one', output=1),
        ])
        for idx, line in mapping_df.iterrows():
            it.append(pmml_row(**dict(line)))
        mapping.append(it)
        mapping_df.set_index(keys=['x1', 'x2'])
        mapping_f = np.vectorize(lambda x1, x2: mapping_df.ix[x1, x2].output.values[0])
        self.ctx = TransformationContext({
            Schema.INPUT: [
                IntegerNumericFeature('x1'),
                StringCategoricalFeature('x2', ['zero', 'one'])
            ],
            Schema.DERIVED: [
                DerivedFeature(
                    feature=RealNumericFeature(name='x3'),
                    transformation=mapping,
                    function=mapping_f
                )
            ],
            Schema.MODEL: [
                IntegerNumericFeature('x1'),
                StringCategoricalFeature('x2', ['zero', 'one']),
                RealNumericFeature(name='x3')
            ],
            Schema.OUTPUT: [
                IntegerCategoricalFeature('output', ['neg', 'pos'])
            ]
        })
        self.converter = DecisionTreeConverter(
            estimator=self.est,
            context=self.ctx,
            mode=ModelMode.CLASSIFICATION
        )
        self.converter.pmml().toxml()