示例#1
0
    def _test_evaluation(self, allow_slow):
        """
        Test that the same predictions are made
        """

        # Generate some smallish (some kernels take too long on anything else) random data
        x, y = [], []
        for _ in range(50):
            cur_x1, cur_x2 = random.gauss(2,3), random.gauss(-1,2)
            x.append([cur_x1, cur_x2])
            y.append( 1 + 2*cur_x1 + 3*cur_x2 )

        input_names = ['x1', 'x2']
        df = pd.DataFrame(x, columns=input_names)

        # Parameters to test
        kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2},
                             {'kernel': 'linear'},
                             {'kernel': 'poly'},  {'kernel': 'poly', 'degree': 2},  {'kernel': 'poly', 'gamma': 0.75},
                                 {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2},
                             {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8},
                                 {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5}
                             ]
        non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False, 'nu': 0.9}]

        # Test
        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                cur_params = param1.copy()
                cur_params.update(param2)

                cur_model = NuSVR(**cur_params)
                cur_model.fit(x, y)
                df['prediction'] = cur_model.predict(x)

                spec = scikit_converter.convert(cur_model, input_names, 'target')

                if macos_version() >= (10, 13):
                    metrics = evaluate_regressor(spec, df)
                    self.assertAlmostEquals(metrics['max_error'], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
    def _train_convert_evaluate_assert(self, **scikit_params):
        scikit_model = GradientBoostingRegressor(random_state=1,
                                                 **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names,
                                     self.output_name)

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df["prediction"] = scikit_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_regressor(spec, df, "target", verbose=False)
            self._check_metrics(metrics, scikit_params)
    def test_conversion_with_sparse_X(self):
        """Tests conversion of a model that's fitted with sparse data."""
        num_samples = 100
        num_dims = 64
        sparse_X = sparse.rand(
            num_samples, num_dims,
            format='csr')  # KNeighborsClassifier only supports CSR format
        y = self.iris_y[
            0:
            num_samples]  # the labels themselves don't matter - just use 100 of the Iris ones

        sklearn_model = KNeighborsClassifier(algorithm='brute')
        sklearn_model.fit(sparse_X, y)

        coreml_model = sklearn.convert(sklearn_model)
        coreml_spec = coreml_model.get_spec()
        self.assertIsNotNone(coreml_spec)
 def _train_convert_evaluate(self, **scikit_params):
     """
     Train a scikit-learn model, convert it and then evaluate it with CoreML
     """
     scikit_model = GradientBoostingClassifier(random_state = 1, **scikit_params)
     scikit_model.fit(self.X, self.target)
     
     # Convert the model
     spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)
     
     # Get predictions
     df = pd.DataFrame(self.X, columns=self.feature_names)
     df['prediction'] = scikit_model.predict(self.X)
     
     # Evaluate it
     metrics = evaluate_classifier(spec, df)
     return metrics
示例#5
0
    def test_random(self):
        # Generate some random data_imputeValue.multiArrayValue[i]
        X = _np.random.random(size=(50, 3))

        for param in ('l1', 'l2', 'max'):

            cur_model = Normalizer(norm=param)

            output = cur_model.fit_transform(X)

            spec = converter.convert(cur_model, ["a", 'b', 'c'], 'out')

            evaluate_transformer(
                spec, [dict(zip(["a", "b", "c"], row)) for row in X],
                [{
                    "out": row
                } for row in output])
    def test_linear_regression_evaluation(self):
        """
        Check that the evaluation results are the same in scikit learn and coremltools
        """
        input_names = self.scikit_data.feature_names
        df = pd.DataFrame(self.scikit_data.data, columns=input_names)

        for normalize_value in (True, False):
            cur_model = LinearRegression(normalize=normalize_value)
            cur_model.fit(self.scikit_data['data'], self.scikit_data['target'])
            spec = convert(cur_model, input_names, 'target')

            if macos_version() >= (10, 13):
                df['prediction'] = cur_model.predict(self.scikit_data.data)

                metrics = evaluate_regressor(spec, df)
                self.assertAlmostEquals(metrics['max_error'], 0)
    def test_random():
        # Generate some random data_imputeValue.multiArrayValue[i]
        X = _np.random.random(size=(50, 3))

        for param in ("l1", "l2", "max"):
            cur_model = Normalizer(norm=param)

            output = cur_model.fit_transform(X)

            spec = converter.convert(cur_model, ["a", "b", "c"], "out")

            evaluate_transformer(
                spec,
                [dict(zip(["a", "b", "c"], row)) for row in X],
                [{
                    "out": row
                } for row in output],
            )
    def test_conversion_brute_algorithm(self):
        """Tests conversion of a scikit KNeighborsClassifier using the brute force algorithm."""
        scikit_model = KNeighborsClassifier(algorithm='brute', n_neighbors=42)
        scikit_model.fit(self.iris_X, self.iris_y)

        coreml_model = sklearn.convert(scikit_model, 'single_input', 'single_output')
        coreml_spec = coreml_model.get_spec()

        self.assertIsNotNone(coreml_spec)
        self.assertTrue(coreml_spec.HasField("kNearestNeighborsClassifier"))
        self.assertEqual(coreml_spec.kNearestNeighborsClassifier.k, 42)
        self.assertTrue(coreml_spec.kNearestNeighborsClassifier.HasField("uniformWeighting"))
        self.assertEqual(coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions, len(self.iris_X[0]))
        self.assertTrue(coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField("linearIndex"))
        self.assertTrue(coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.HasField("squaredEuclideanDistance"))

        self.validate_labels(coreml_spec, self.iris_y)
        self.validate_float_samples(coreml_spec, self.iris_X)
    def _train_convert_evaluate(self, **scikit_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        scikit_model = DecisionTreeRegressor(random_state=1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names,
                                     self.output_name)

        # Get predictions
        df = pd.DataFrame(self.X, columns=self.feature_names)
        df['prediction'] = scikit_model.predict(self.X)

        # Evaluate it
        metrics = evaluate_regressor(spec, df, target='target', verbose=False)
        return metrics
    def _train_convert_evaluate_assert(self, **scikit_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        scikit_model = RandomForestRegressor(random_state=1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name)

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df["prediction"] = scikit_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_regressor(spec, df, verbose=False)
            self._check_metrics(metrics, scikit_params)
示例#11
0
    def test_random(self):
        # Generate some random data
        X = _np.random.random(size=(50, 3))

        cur_model = StandardScaler()

        output = cur_model.fit_transform(X)

        spec = converter.convert(cur_model, ["a", 'b', 'c'], 'out').get_spec()

        if macos_version() >= (10, 13):
            metrics = evaluate_transformer(
                spec, [dict(zip(["a", "b", "c"], row)) for row in X],
                [{
                    "out": row
                } for row in output])

            assert metrics["num_errors"] == 0
示例#12
0
    def test_pipeline_rename(self):
        # Convert
        scikit_spec = converter.convert(self.scikit_model).get_spec()
        model = MLModel(scikit_spec)
        sample_data = self.scikit_data.data[0]

        # Rename
        rename_feature(scikit_spec, "input", "renamed_input")
        renamed_model = MLModel(scikit_spec)

        # Check the predictions
        if _is_macos() and _macos_version() >= (10, 13):
            out_dict = model.predict({"input": sample_data})
            out_dict_renamed = renamed_model.predict({"renamed_input": sample_data})
            self.assertAlmostEqual(list(out_dict.keys()), list(out_dict_renamed.keys()))
            self.assertAlmostEqual(
                list(out_dict.values()), list(out_dict_renamed.values())
            )
示例#13
0
    def test_boston_OHE(self): 
        data = load_boston()

        for categorical_features in [ [3], [8], [3, 8], [8,3] ]:

            model = OneHotEncoder(categorical_features = categorical_features, sparse=False)
            model.fit(data.data, data.target)

            # Convert the model
            spec = sklearn.convert(model, data.feature_names, 'out').get_spec()

            input_data = [dict(zip(data.feature_names, row)) for row in data.data]
            output_data = [{"out" : row} for row in model.transform(data.data)]

            if macos_version() >= (10, 13):
                result = evaluate_transformer(spec, input_data, output_data)

                assert result["num_errors"] == 0
示例#14
0
    def test_boston(self):
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = Normalizer(norm="l2").fit(scikit_data.data)

        spec = converter.convert(scikit_model, scikit_data.feature_names,
                                 "out")

        input_data = [
            dict(zip(scikit_data.feature_names, row))
            for row in scikit_data.data
        ]

        output_data = [{
            "out": row
        } for row in scikit_model.transform(scikit_data.data)]

        evaluate_transformer(spec, input_data, output_data)
示例#15
0
    def test_conversion_many_columns(self):
        scikit_model = OneHotEncoder()
        scikit_model.fit(self.scikit_data_multiple_cols)
        spec = sklearn.convert(scikit_model, ['feature_1', 'feature_2'],
                               'out').get_spec()

        test_data = [{
            'feature_1': row[0],
            'feature_2': row[1]
        } for row in self.scikit_data_multiple_cols]
        scikit_output = [{
            'out': row
        } for row in scikit_model.transform(
            self.scikit_data_multiple_cols).toarray()]
        metrics = evaluate_transformer(spec, test_data, scikit_output)

        self.assertIsNotNone(spec)
        self.assertIsNotNone(spec.description)
        self.assertEquals(metrics['num_errors'], 0)
示例#16
0
    def test_boston_OHE_plus_normalizer(self):

        data = load_boston()

        pl = Pipeline([
            ("OHE", OneHotEncoder(categorical_features = [8], sparse=False)),
            ("Scaler",StandardScaler())])

        pl.fit(data.data, data.target)

        # Convert the model
        spec = convert(pl, data.feature_names, 'out')

        if macos_version() >= (10, 13):
            input_data = [dict(zip(data.feature_names, row)) for row in data.data]
            output_data = [{"out" : row} for row in pl.transform(data.data)]

            result = evaluate_transformer(spec, input_data, output_data)
            assert result["num_errors"] == 0
示例#17
0
    def test_conversion_one_column_of_several(self):
        scikit_model = OneHotEncoder(categorical_features=[0])
        scikit_model.fit(copy(self.scikit_data_multiple_cols))
        spec = sklearn.convert(scikit_model, ["feature_1", "feature_2"],
                               "out").get_spec()

        test_data = [{
            "feature_1": row[0],
            "feature_2": row[1]
        } for row in self.scikit_data_multiple_cols]
        scikit_output = [{
            "out": row
        } for row in scikit_model.transform(
            self.scikit_data_multiple_cols).toarray()]
        metrics = evaluate_transformer(spec, test_data, scikit_output)

        self.assertIsNotNone(spec)
        self.assertIsNotNone(spec.description)
        self.assertEqual(metrics["num_errors"], 0)
    def test_conversion_kd_tree_algorithm(self):
        """Tests conversion of a scikit KNeighborsClassifier using the brute force algorithm."""
        test_leaf_size = 23
        test_n_neighbors = 42
        scikit_model = KNeighborsClassifier(algorithm='kd_tree',
                                            leaf_size=test_leaf_size,
                                            n_neighbors=test_n_neighbors)
        scikit_model.fit(self.iris_X, self.iris_y)

        coreml_model = sklearn.convert(scikit_model, 'single_input',
                                       'single_output')
        coreml_spec = coreml_model.get_spec()

        self.assertIsNotNone(coreml_spec)
        self.assertTrue(coreml_spec.HasField("kNearestNeighborsClassifier"))
        self.assertEqual(
            coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.
            defaultValue, test_n_neighbors)
        self.assertEqual(
            coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.range.
            minValue, 1)
        self.assertEqual(
            coreml_spec.kNearestNeighborsClassifier.numberOfNeighbors.range.
            maxValue, len(self.iris_X))
        self.assertTrue(
            coreml_spec.kNearestNeighborsClassifier.HasField(
                "uniformWeighting"))
        self.assertEqual(
            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.
            numberOfDimensions, len(self.iris_X[0]))
        self.assertTrue(
            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.
            HasField("singleKdTreeIndex"))
        self.assertEqual(
            test_leaf_size, coreml_spec.kNearestNeighborsClassifier.
            nearestNeighborsIndex.singleKdTreeIndex.leafSize)
        self.assertTrue(
            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.
            HasField("squaredEuclideanDistance"))

        self.validate_labels(coreml_spec, self.iris_y)
        self.validate_float_samples(coreml_spec, self.iris_X)
    def test_random():
        # Generate some random data
        X = _np.random.random(size=(50, 3))

        cur_model = StandardScaler()

        output = cur_model.fit_transform(X)

        spec = converter.convert(cur_model, ["a", "b", "c"], "out").get_spec()

        metrics = evaluate_transformer(
            spec,
            [dict(zip(["a", "b", "c"], row)) for row in X],
            [{
                "out": row
            } for row in output],
        )

        if metrics["num_errors"] != 0:
            raise AssertionError
示例#20
0
    def test_conversion_one_column_of_several(self):
        scikit_model = OneHotEncoder(categorical_features=[0])
        scikit_model.fit(copy(self.scikit_data_multiple_cols))
        spec = sklearn.convert(scikit_model, ['feature_1', 'feature_2'],
                               'out').get_spec()

        if macos_version() >= (10, 13):
            test_data = [{
                'feature_1': row[0],
                'feature_2': row[1]
            } for row in self.scikit_data_multiple_cols]
            scikit_output = [{
                'out': row
            } for row in scikit_model.transform(
                self.scikit_data_multiple_cols).toarray()]
            metrics = evaluate_transformer(spec, test_data, scikit_output)

            self.assertIsNotNone(spec)
            self.assertIsNotNone(spec.description)
            self.assertEquals(metrics['num_errors'], 0)
示例#21
0
    def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """

        if not (HAS_SKLEARN):
            return

        scikit_data = load_boston()
        feature_names = scikit_data.feature_names

        scikit_model = LinearRegression()
        scikit_model.fit(scikit_data['data'], scikit_data['target'])
        scikit_spec = converter.convert(scikit_model, feature_names,
                                        'target').get_spec()

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
        self.scikit_spec = scikit_spec
示例#22
0
    def test_boston(self):
        from sklearn.datasets import load_boston

        scikit_data = load_boston()
        scikit_model = StandardScaler().fit(scikit_data.data)

        spec = converter.convert(scikit_model, scikit_data.feature_names,
                                 'out').get_spec()

        input_data = [
            dict(zip(scikit_data.feature_names, row))
            for row in scikit_data.data
        ]

        output_data = [{
            "out": row
        } for row in scikit_model.transform(scikit_data.data)]

        metrics = evaluate_transformer(spec, input_data, output_data)

        assert metrics["num_errors"] == 0
    def test_boston_OHE_plus_trees(self): 

        data = load_boston()

        pl = Pipeline([
            ("OHE", OneHotEncoder(categorical_features = [8], sparse=False)), 
            ("Trees",GradientBoostingRegressor(random_state = 1))])

        pl.fit(data.data, data.target)

        # Convert the model
        spec = convert(pl, data.feature_names, 'target')

        # Get predictions
        df = pd.DataFrame(data.data, columns=data.feature_names)
        df['prediction'] = pl.predict(data.data)

        # Evaluate it
        result = evaluate_regressor(spec, df, 'target', verbose = False)

        assert result["max_error"] < 0.0001
示例#24
0
    def test_conversion_boston(self):

        from sklearn.datasets import load_boston

        scikit_data = load_boston()

        sh = scikit_data.data.shape

        rn.seed(0)
        missing_value_indices = [(rn.randint(sh[0]), rn.randint(sh[1]))
                                 for k in range(sh[0])]

        for strategy in ["mean", "median", "most_frequent"]:
            for missing_value in [0, 'NaN', -999]:

                X = np.array(scikit_data.data).copy()

                for i, j in missing_value_indices:
                    X[i, j] = missing_value

                model = Imputer(missing_values=missing_value,
                                strategy=strategy)
                model = model.fit(X)

                tr_X = model.transform(X.copy())

                spec = converter.convert(model, scikit_data.feature_names,
                                         'out')

                if macos_version() >= (10, 13):
                    input_data = [
                        dict(zip(scikit_data.feature_names, row)) for row in X
                    ]

                    output_data = [{"out": row} for row in tr_X]

                    result = evaluate_transformer(spec, input_data,
                                                  output_data)

                    assert result["num_errors"] == 0
示例#25
0
    def _test_conversion(self, data, trained_dict_vectorizer):

        X = trained_dict_vectorizer.transform(data)

        m = sklearn.convert(
            trained_dict_vectorizer,
            input_features="features",
            output_feature_names="output",
        )

        if _is_macos() and _macos_version() >= (10, 13):
            ret = evaluate_transformer(
                m,
                [{
                    "features": row
                } for row in data],
                [{
                    "output": x_r
                } for x_r in X],
                True,
            )
            assert ret["num_errors"] == 0
    def _conversion_and_evaluation_helper_for_linear_svc(self, class_labels):
        ARGS = [
            {},
            {
                "C": 0.75,
                "loss": "hinge"
            },
            {
                "penalty": "l1",
                "dual": False
            },
            {
                "tol": 0.001,
                "fit_intercept": False
            },
            {
                "intercept_scaling": 1.5
            },
        ]

        x, y = GlmCassifierTest._generate_random_data(class_labels)
        column_names = ["x1", "x2"]
        df = pd.DataFrame(x, columns=column_names)

        for cur_args in ARGS:
            print(class_labels, cur_args)
            cur_model = LinearSVC(**cur_args)
            cur_model.fit(x, y)

            spec = convert(cur_model,
                           input_features=column_names,
                           output_feature_names="target")

            if _is_macos() and _macos_version() >= (10, 13):
                df["prediction"] = cur_model.predict(x)

                cur_eval_metics = evaluate_classifier(spec, df, verbose=False)
                self.assertEquals(cur_eval_metics["num_errors"], 0)
    def test_linear_svr_evaluation(self):
        """
        Check that the evaluation results are the same in scikit learn and coremltools
        """
        ARGS = [
            {},
            {
                "C": 0.5,
                "epsilon": 0.25
            },
            {
                "dual": False,
                "loss": "squared_epsilon_insensitive"
            },
            {
                "tol": 0.005
            },
            {
                "fit_intercept": False
            },
            {
                "intercept_scaling": 1.5
            },
        ]

        input_names = self.scikit_data.feature_names
        df = pd.DataFrame(self.scikit_data.data, columns=input_names)

        for cur_args in ARGS:
            print(cur_args)
            cur_model = LinearSVR(**cur_args)
            cur_model.fit(self.scikit_data["data"], self.scikit_data["target"])
            spec = convert(cur_model, input_names, "target")

            df["prediction"] = cur_model.predict(self.scikit_data.data)

            metrics = evaluate_regressor(spec, df)
            self.assertAlmostEqual(metrics["max_error"], 0)
示例#28
0
    def _test_boston_OHE_plus_trees(self, loss='ls'):

        data = load_boston()

        pl = Pipeline([
            ("OHE", OneHotEncoder(categorical_features=[8], sparse=False)),
            ("Trees", GradientBoostingRegressor(random_state=1, loss=loss)),
        ])

        pl.fit(data.data, data.target)

        # Convert the model
        spec = convert(pl, data.feature_names, "target")

        if _is_macos() and _macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(data.data, columns=data.feature_names)
            df["prediction"] = pl.predict(data.data)

            # Evaluate it
            result = evaluate_regressor(spec, df, "target", verbose=False)

            assert result["max_error"] < 0.0001
示例#29
0
    def test_boston_OHE_pipeline(self): 
        data = load_boston()
            
        for categorical_features in [ [3], [8], [3, 8], [8,3] ]:

            # Put it in a pipeline so that we can test whether the output dimension
            # handling is correct. 

            model = Pipeline([("OHE", OneHotEncoder(categorical_features = categorical_features)),
                 ("Normalizer", Normalizer())])

            model.fit(data.data.copy(), data.target)

            # Convert the model
            spec = sklearn.convert(model, data.feature_names, 'out').get_spec()

            if macos_version() >= (10, 13):
                input_data = [dict(zip(data.feature_names, row)) for row in data.data]
                output_data = [{"out" : row} for row in model.transform(data.data.copy())]

                result = evaluate_transformer(spec, input_data, output_data)

                assert result["num_errors"] == 0
示例#30
0
    def test_conversion_distance_function_good(self):
        """Tests conversion of a scikit KNeighborsClassifier with a valid distance metric."""
        scikit_model = KNeighborsClassifier(algorithm="brute",
                                            metric="euclidean")
        scikit_model.fit(self.iris_X, self.iris_y)
        coreml_model = sklearn.convert(scikit_model, "single_input",
                                       "single_output")
        coreml_spec = coreml_model.get_spec()
        self.assertIsNotNone(coreml_spec)
        self.assertTrue(
            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.
            HasField("squaredEuclideanDistance"))

        # Minkowski metric with p=2 is equivalent to the squared Euclidean distance
        scikit_model = KNeighborsClassifier(algorithm="brute",
                                            metric="minkowski",
                                            p=2)
        scikit_model.fit(self.iris_X, self.iris_y)
        coreml_spec = coreml_model.get_spec()
        self.assertIsNotNone(coreml_spec)
        self.assertTrue(
            coreml_spec.kNearestNeighborsClassifier.nearestNeighborsIndex.
            HasField("squaredEuclideanDistance"))