Exemplo n.º 1
0
    def test_default_names(self):
        df = pd.DataFrame({'input': self.x})

        # Test with probabilities
        spec = libsvm.convert(self.libsvm_model).get_spec()
        if is_macos() and macos_version() >= (10, 13):
            (_, _, probability_lists) = svm_predict(self.y, self.x,
                                                    self.libsvm_model,
                                                    '-b 1 -q')
            probability_dicts = [
                dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
            ]
            df['classProbability'] = probability_dicts
            metrics = evaluate_classifier_with_probabilities(
                spec, df, verbose=False, probabilities='classProbability')
            self.assertLess(metrics['max_probability_error'], 0.00001)

        # Test model without probabilities
        no_probability_model = svmutil.svm_train(self.prob,
                                                 svmutil.svm_parameter())
        spec = libsvm.convert(no_probability_model).get_spec()
        self.assertEqual(len(spec.description.output), 1)
        self.assertEqual(spec.description.output[0].name, u'target')
        if is_macos() and macos_version() >= (10, 13):
            (df['prediction'], _, _) = svm_predict(self.y, self.x,
                                                   no_probability_model, ' -q')
            metrics = evaluate_classifier(spec, df, verbose=False)
            self.assertEquals(metrics['num_errors'], 0)
Exemplo n.º 2
0
    def test_classifier_no_name(self):
        np.random.seed(1988)

        input_dim = 5
        num_hidden = 12
        num_classes = 6
        input_length = 3

        model = Sequential()
        model.add(
            LSTM(num_hidden,
                 input_dim=input_dim,
                 input_length=input_length,
                 return_sequences=False))
        model.add(Dense(num_classes, activation='softmax'))

        model.set_weights(
            [np.random.rand(*w.shape) for w in model.get_weights()])

        input_names = ['input']
        output_names = ['zzzz']
        class_labels = ['a', 'b', 'c', 'd', 'e', 'f']
        predicted_feature_name = 'pf'
        coremlmodel = keras_converter.convert(
            model,
            input_names,
            output_names,
            class_labels=class_labels,
            predicted_feature_name=predicted_feature_name)

        if is_macos() and macos_version() >= (10, 13):
            inputs = np.random.rand(input_dim)
            outputs = coremlmodel.predict({'input': inputs})
            # this checks that the dictionary got the right name and type
            self.assertEquals(type(outputs[output_names[0]]), type({'a': 0.5}))
Exemplo n.º 3
0
    def _conversion_and_evaluation_helper_for_logistic_regression(self, class_labels):
        options = {
            'C': (0.1, 1., 2.),
            'fit_intercept': (True, False), 
            'class_weight': ('balanced', None),
            'solver': ('newton-cg', 'lbfgs', 'liblinear', 'sag')
            }

        # Generate a list of all combinations of options and the default parameters
        product = itertools.product(*options.values())
        args = [{}] + [dict(zip(options.keys(), p)) for p in product]

        x, y = GlmCassifierTest._generate_random_data(class_labels)
        column_names = ['x1', 'x2']
        df = pd.DataFrame(x, columns=column_names)

        for cur_args in args:
            print(class_labels, cur_args)
            cur_model = LogisticRegression(**cur_args)
            cur_model.fit(x, y)

            spec = convert(cur_model, input_features=column_names,
                           output_feature_names='target')

            if is_macos() and macos_version() >= (10, 13):
                probability_lists = cur_model.predict_proba(x)
                df['classProbability'] = [dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists]

                metrics = evaluate_classifier_with_probabilities(spec, df, probabilities='classProbability', verbose=False)
                self.assertEquals(metrics['num_key_mismatch'], 0)
                self.assertLess(metrics['max_probability_error'],  0.00001)
Exemplo n.º 4
0
    def _conversion_and_evaluation_helper_for_linear_svc(self, class_labels):
        ARGS = [ {},
                 {'C' : .75, 'loss': 'hinge'},
                 {'penalty': 'l1', 'dual': False},
                 {'tol': 0.001, 'fit_intercept': False},
                 {'intercept_scaling': 1.5}
        ]

        x, y = GlmCassifierTest._generate_random_data(class_labels)
        column_names = ['x1', 'x2']
        df = pd.DataFrame(x, columns=column_names)
        
        for cur_args in ARGS:
            print(class_labels, cur_args)
            cur_model = LinearSVC(**cur_args)
            cur_model.fit(x, y)

            spec = convert(cur_model, input_features=column_names,
                           output_feature_names='target')

            if is_macos() and macos_version() >= (10, 13):
                df['prediction'] = cur_model.predict(x)

                cur_eval_metics = evaluate_classifier(spec, df, verbose=False)
                self.assertEquals(cur_eval_metics['num_errors'], 0)
Exemplo n.º 5
0
    def _test_prob_model(self, param1, param2):
        probability_param = '-b 1'
        df = self.df

        param_str = ' '.join(
            [self.base_param, param1, param2, probability_param])
        param = svm_parameter(param_str)

        model = svm_train(self.prob, param)

        # Get predictions with probabilities as dictionaries
        (df['prediction'], _,
         probability_lists) = svm_predict(self.y, self.x, model,
                                          probability_param + ' -q')
        probability_dicts = [
            dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists
        ]
        df['probabilities'] = probability_dicts

        spec = libsvm.convert(model, self.column_names, 'target',
                              'probabilities')

        if is_macos() and macos_version() >= (10, 13):
            metrics = evaluate_classifier_with_probabilities(spec,
                                                             df,
                                                             verbose=False)
            self.assertEquals(metrics['num_key_mismatch'], 0)
            self.assertLess(metrics['max_probability_error'], 0.00001)
Exemplo n.º 6
0
    def _train_convert_evaluate_assert(self, bt_params={}, **params):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        # Train a model
        xgb_model = xgboost.XGBRegressor(**params)
        xgb_model.fit(self.X, self.target)

        # Convert the model (feature_names can't be given because of XGboost)
        spec = xgb_converter.convert(xgb_model,
                                     self.feature_names,
                                     self.output_name,
                                     force_32bit_float=False)

        if is_macos() and macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df['prediction'] = xgb_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_regressor(spec,
                                         df,
                                         target='target',
                                         verbose=False)
            self._check_metrics(metrics, bt_params)
Exemplo n.º 7
0
    def test_int_features_in_pipeline(self):

        import numpy.random as rn
        import pandas as pd
        rn.seed(0)

        x_train_dict = [
            dict((rn.randint(100), 1) for i in range(20)) for j in range(100)
        ]
        y_train = [0, 1] * 50

        from sklearn.pipeline import Pipeline
        from sklearn.feature_extraction import DictVectorizer
        from sklearn.linear_model import LogisticRegression

        pl = Pipeline([("dv", DictVectorizer()), ("lm", LogisticRegression())])
        pl.fit(x_train_dict, y_train)

        import coremltools

        model = coremltools.converters.sklearn.convert(
            pl, input_features="features", output_feature_names="target")

        if is_macos() and macos_version() >= (10, 13):
            x = pd.DataFrame({
                "features": x_train_dict,
                "prediction": pl.predict(x_train_dict)
            })

            cur_eval_metics = evaluate_classifier(model, x)
            self.assertEquals(cur_eval_metics['num_errors'], 0)
Exemplo n.º 8
0
    def _test_evaluation(self, allow_slow):
        """
        Test that the same predictions are made
        """
        from svm import svm_parameter, svm_problem
        from svmutil import svm_train, svm_predict

        # Generate some smallish (poly kernels take too long on anything else) random data
        x, y = [], []
        for _ in range(50):
            cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2)
            x.append([cur_x1, cur_x2])
            y.append(1 + 2 * cur_x1 + 3 * cur_x2)

        input_names = ['x1', 'x2']
        df = pd.DataFrame(x, columns=input_names)
        prob = svm_problem(y, x)

        # Parameters
        base_param = '-s 3'  # model type is epsilon SVR
        non_kernel_parameters = [
            '', '-c 1.5 -p 0.5 -h 1', '-c 0.5 -p 0.5 -h 0'
        ]
        kernel_parameters = [
            '',
            '-t 2 -g 1.2',  # rbf kernel
            '-t 0',  # linear kernel
            '-t 1',
            '-t 1 -d 2',
            '-t 1 -g 0.75',
            '-t 1 -d 0 -g 0.9 -r 2',  # poly kernel
            '-t 3',
            '-t 3 -g 1.3',
            '-t 3 -r 0.8',
            '-t 3 -r 0.8 -g 0.5'  # sigmoid kernel
        ]

        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                param_str = ' '.join([base_param, param1, param2])
                print(param_str)
                param = svm_parameter(param_str)

                model = svm_train(prob, param)
                (df['prediction'], _, _) = svm_predict(y, x, model)

                spec = libsvm.convert(model,
                                      input_names=input_names,
                                      target_name='target')

                if is_macos() and macos_version() >= (10, 13):
                    metrics = evaluate_regressor(spec, df)
                    self.assertAlmostEquals(metrics['max_error'], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
Exemplo n.º 9
0
    def test_internal_layer(self):

        np.random.seed(1988)

        input_dim = 5
        num_channels1 = 10
        num_channels2 = 7
        num_channels3 = 5

        w1 = (np.random.rand(input_dim, num_channels1) - 0.5) / 5.0
        w2 = (np.random.rand(num_channels1, num_channels2) - 0.5) / 5.0
        w3 = (np.random.rand(num_channels2, num_channels3) - 0.5) / 5.0

        b1 = (np.random.rand(num_channels1, ) - 0.5) / 5.0
        b2 = (np.random.rand(num_channels2, ) - 0.5) / 5.0
        b3 = (np.random.rand(num_channels3, ) - 0.5) / 5.0

        model = Sequential()
        model.add(Dense(num_channels1, input_dim=input_dim))
        model.add(Dense(num_channels2, name='middle_layer'))
        model.add(Dense(num_channels3))

        model.set_weights([w1, b1, w2, b2, w3, b3])

        input_names = ['input']
        output_names = ['output']
        coreml1 = keras_converter.convert(model, input_names, output_names)

        # adjust the output parameters of coreml1 to include the intermediate layer
        spec = coreml1.get_spec()
        coremlNewOutputs = spec.description.output.add()
        coremlNewOutputs.name = 'middle_layer_output'
        coremlNewParams = coremlNewOutputs.type.multiArrayType
        coremlNewParams.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value(
            'DOUBLE')
        coremlNewParams.shape.extend([num_channels2])

        coremlfinal = coremltools.models.MLModel(spec)

        # generate a second model which
        model2 = Sequential()
        model2.add(Dense(num_channels1, input_dim=input_dim))
        model2.add(Dense(num_channels2))
        model2.set_weights([w1, b1, w2, b2])

        coreml2 = keras_converter.convert(model2, input_names, ['output2'])

        if is_macos() and macos_version() >= (10, 13):
            # generate input data
            inputs = np.random.rand(input_dim)

            fullOutputs = coremlfinal.predict({'input': inputs})

            partialOutput = coreml2.predict({'input': inputs})

            for i in range(0, num_channels2):
                self.assertAlmostEquals(fullOutputs['middle_layer_output'][i],
                                        partialOutput['output2'][i], 2)
Exemplo n.º 10
0
    def run_case(self, layer_type, input_layer, output_layer, delta=1e-2):
        self.maxDiff = None
        extract_tarfile('{}nets/{}.gz'.format(nets_path, layer_type),
                        '{}nets/'.format(nets_path))
        nets = traverse_caffe_nets(layer_type)
        data_files = traverse_data_files(layer_type)
        failed_tests_load = []
        failed_tests_conversion = []
        failed_tests_evaluation = []
        counter = 0
        for net_name_proto in nets:

            counter += 1
            net_data_files = []
            proto_name = \
                net_name_proto.split("_")[0] + \
                "_" + \
                net_name_proto.split("_")[1]
            for file in data_files:
                if proto_name + '_' in file:
                    net_data_files.append(file)
            net_name = net_name_proto.split(".")[0]
            conversion_result = conversion_to_mlmodel(net_name, proto_name,
                                                      layer_type, input_layer)
            if is_macos() and macos_version() >= (10, 13):
                if conversion_result is False:
                    failed_tests_conversion.append(net_name)
                    continue
                load_result = load_mlmodel(net_name, layer_type)
                if load_result is False:
                    failed_tests_load.append(net_name)
                if 'input' in net_name:
                    evaluation_result, failed_tests_evaluation = \
                        self.evaluate_model(
                            net_name,
                            layer_type,
                            input_layer,
                            output_layer,
                            net_data_files,
                            failed_tests_evaluation,
                            counter,
                            delta)
        with open('./failed_tests_{}.json'.format(layer_type), mode='w') \
                as file:
            json.dump(
                {
                    'conversion': failed_tests_conversion,
                    'load': failed_tests_load,
                    'evaluation': failed_tests_evaluation
                }, file)

        self.assertEqual(failed_tests_conversion, [])
        self.assertEqual(failed_tests_load, [])
        self.assertEqual(failed_tests_evaluation, [])
        shutil.rmtree('{}nets/{}'.format(nets_path, layer_type))
Exemplo n.º 11
0
 def _test_model(self, input_dict, ref_output_dict, coreml_model):
     if is_macos() and macos_version() >= (10, 13):
         coreml_out_dict = coreml_model.predict(input_dict, useCPUOnly=True)
         for out_ in list(ref_output_dict.keys()):
             ref_out = ref_output_dict[out_]
             coreml_out = coreml_out_dict[out_]
             if self._compare_shapes(ref_out, coreml_out):
                 return True, self._compare_predictions(ref_out, coreml_out)
             else:
                 return False, False
     return True, True
Exemplo n.º 12
0
    def _test_conversion(self, data, trained_dict_vectorizer):

        X = trained_dict_vectorizer.transform(data)

        m = sklearn.convert(trained_dict_vectorizer,
                            input_features="features",
                            output_feature_names="output")

        if is_macos() and macos_version() >= (10, 13):
            ret = evaluate_transformer(m, [{
                "features": row
            } for row in data], [{
                "output": x_r
            } for x_r in X], True)
            assert ret["num_errors"] == 0
    def _train_convert_evaluate_assert(self, **scikit_params):
        scikit_model = RandomForestClassifier(random_state=1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names,
                                     self.output_name)

        if is_macos() and macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df['prediction'] = scikit_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_classifier(spec, df, verbose=False)
            self._check_metrics(metrics, scikit_params)
Exemplo n.º 14
0
    def test_pipeline_rename(self):

        # Convert
        scikit_spec = converter.convert(self.scikit_model).get_spec()
        model = MLModel(scikit_spec)
        sample_data = self.scikit_data.data[0]

        # Rename
        rename_feature(scikit_spec, 'input', 'renamed_input')
        renamed_model = MLModel(scikit_spec)

        # Check the predictions
        if is_macos() and macos_version() >= (10, 13):
            self.assertEquals(
                model.predict({'input': sample_data}),
                renamed_model.predict({'renamed_input': sample_data}))
Exemplo n.º 15
0
    def _test_evaluation(self, allow_slow):
        """
        Test that the same predictions are made
        """

        # Generate some smallish (some kernels take too long on anything else) random data
        x, y = [], []
        for _ in range(50):
            cur_x1, cur_x2 = random.gauss(2,3), random.gauss(-1,2)
            x.append([cur_x1, cur_x2])
            y.append( 1 + 2*cur_x1 + 3*cur_x2 )

        input_names = ['x1', 'x2']
        df = pd.DataFrame(x, columns=input_names)

        # Parameters to test
        kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2},
                             {'kernel': 'linear'},
                             {'kernel': 'poly'},  {'kernel': 'poly', 'degree': 2},  {'kernel': 'poly', 'gamma': 0.75},
                                 {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2},
                             {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8},
                                 {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5}
                             ]
        non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False, 'nu': 0.9}]

        # Test
        for param1 in non_kernel_parameters:
            for param2 in kernel_parameters:
                cur_params = param1.copy()
                cur_params.update(param2)

                cur_model = NuSVR(**cur_params)
                cur_model.fit(x, y)
                df['prediction'] = cur_model.predict(x)

                spec = scikit_converter.convert(cur_model, input_names, 'target')

                if is_macos() and macos_version() >= (10, 13):
                    metrics = evaluate_regressor(spec, df)
                    self.assertAlmostEquals(metrics['max_error'], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
    def test_keras_1_image_bias(self):
        
        #define Keras model and get prediction
        input_shape=(100,50,3)
        model = Sequential()
        model.add(Activation('linear', input_shape=input_shape))
        
        data = np.ones(input_shape)
        keras_input = np.ones(input_shape)
        data[:,:,0] = 128.0; 
        data[:,:,1] = 27.0;
        data[:,:,2] = 200.0;
        red_bias = -12.0;
        green_bias = -20;
        blue_bias = -4;
        keras_input[:,:,0] = data[:,:,0] + red_bias;
        keras_input[:,:,1] = data[:,:,1] + green_bias;
        keras_input[:,:,2] = data[:,:,2] + blue_bias;
        
        keras_preds = model.predict(np.expand_dims(keras_input, axis = 0))
        keras_preds = np.transpose(keras_preds, [0,3,1,2]).flatten()
        
        #convert to coreml and get predictions
        model_dir = tempfile.mkdtemp()
        model_path = os.path.join(model_dir, 'keras.mlmodel')
        from coremltools.converters import keras as keras_converter
        coreml_model = keras_converter.convert(model, input_names = ['data'], output_names = ['output'], 
                                                image_input_names = ['data'], 
                                                red_bias = red_bias, 
                                                green_bias = green_bias, 
                                                blue_bias = blue_bias)
        #coreml_model.save(model_path)    
        #coreml_model = coremltools.models.MLModel(model_path)
        
        if is_macos() and macos_version() >= (10, 13):
            coreml_input_dict = dict()
            coreml_input_dict["data"] = PIL.Image.fromarray(data.astype(np.uint8))
            coreml_preds = coreml_model.predict(coreml_input_dict)['output'].flatten()

            self.assertEquals(len(keras_preds), len(coreml_preds))    
            max_relative_error = compare_models(keras_preds, coreml_preds)
            self.assertAlmostEquals(max(max_relative_error, .001), .001, delta = 1e-6)
        
                                                            
        if os.path.exists(model_dir):
            shutil.rmtree(model_dir)    
Exemplo n.º 17
0
    def _evaluation_test_helper_no_probability(self, labels, allow_slow):
        # Generate some random data.
        # This unit test should not rely on scikit learn for test data.
        x, y = [], []
        random.seed(42)
        for _ in range(50):
            x.append([
                random.gauss(200, 30),
                random.gauss(-100, 22),
                random.gauss(100, 42)
            ])
            y.append(random.choice(labels))
        # make sure first label is seen first, second is seen second, and so on.
        for i, val in enumerate(labels):
            y[i] = val
        column_names = ['x1', 'x2', 'x3']
        prob = svmutil.svm_problem(y, x)

        df = pd.DataFrame(x, columns=column_names)

        for param1 in self.non_kernel_parameters:
            for param2 in self.kernel_parameters:
                param_str = ' '.join([self.base_param, param1, param2])
                print("PARAMS: ", param_str)
                param = svm_parameter(param_str)

                model = svm_train(prob, param)

                # Get predictions with probabilities as dictionaries
                (df['prediction'], _, _) = svm_predict(y, x, model, ' -q')

                spec = libsvm.convert(model, column_names, 'target')

                if is_macos() and macos_version() >= (10, 13):
                    metrics = evaluate_classifier(spec, df, verbose=False)
                    self.assertEquals(metrics['num_errors'], 0)

                if not allow_slow:
                    break

            if not allow_slow:
                break
    def _train_convert_evaluate_assert(self, **scikit_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        scikit_model = GradientBoostingClassifier(random_state=1,
                                                  **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names,
                                     self.output_name)

        if is_macos() and macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df['prediction'] = scikit_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_classifier(spec, df)
            self._check_metrics(metrics)
Exemplo n.º 19
0
    def test_pipeline_rename(self):

        # Convert
        scikit_spec = converter.convert(self.scikit_model).get_spec()
        model = MLModel(scikit_spec)
        sample_data = self.scikit_data.data[0]

        # Rename
        rename_feature(scikit_spec, 'input', 'renamed_input')
        renamed_model = MLModel(scikit_spec)

        # Check the predictions
        if is_macos() and macos_version() >= (10, 13):
            out_dict = model.predict({'input': sample_data})
            out_dict_renamed = renamed_model.predict(
                {'renamed_input': sample_data})
            self.assertAlmostEqual(list(out_dict.keys()),
                                   list(out_dict_renamed.keys()))
            self.assertAlmostEqual(list(out_dict.values()),
                                   list(out_dict_renamed.values()))
Exemplo n.º 20
0
    def _evaluation_test_helper_with_probability(self, labels, allow_slow):
        import copy
        df = pd.DataFrame(self.x, columns=self.column_names)
        y = copy.copy(self.y)
        for i, val in enumerate(labels):
            y[i] = val
        probability_param = '-b 1'

        for param1 in self.non_kernel_parameters:
            for param2 in self.kernel_parameters:
                param_str = ' '.join(
                    [self.base_param, param1, param2, probability_param])
                # print("PARAMS: ", param_str)
                param = svm_parameter(param_str)

                model = svm_train(self.prob, param)

                # Get predictions with probabilities as dictionaries
                (df['prediction'], _,
                 probability_lists) = svm_predict(y, self.x, model,
                                                  probability_param + ' -q')
                probability_dicts = [
                    dict(zip([1, 2], cur_vals))
                    for cur_vals in probability_lists
                ]
                df['probabilities'] = probability_dicts

                spec = libsvm.convert(model, self.column_names, 'target',
                                      'probabilities')

                if is_macos() and macos_version() >= (10, 13):
                    metrics = evaluate_classifier_with_probabilities(
                        spec, df, verbose=False)
                    self.assertEquals(metrics['num_key_mismatch'], 0)
                    self.assertLess(metrics['max_probability_error'], 0.00001)

                if not allow_slow:
                    break

            if not allow_slow:
                break
Exemplo n.º 21
0
    def test_boston_OHE_plus_normalizer(self):

        data = load_boston()

        pl = Pipeline([("OHE",
                        OneHotEncoder(categorical_features=[8], sparse=False)),
                       ("Scaler", StandardScaler())])

        pl.fit(data.data, data.target)

        # Convert the model
        spec = convert(pl, data.feature_names, 'out')

        if is_macos() and macos_version() >= (10, 13):
            input_data = [
                dict(zip(data.feature_names, row)) for row in data.data
            ]
            output_data = [{"out": row} for row in pl.transform(data.data)]

            result = evaluate_transformer(spec, input_data, output_data)
            assert result["num_errors"] == 0
    def _train_convert_evaluate_assert(self, **xgboost_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        xgb_model = xgboost.XGBClassifier(**xgboost_params)
        xgb_model.fit(self.X, self.target)

        # Convert the model
        spec = xgb_converter.convert(xgb_model,
                                     self.feature_names,
                                     self.output_name,
                                     mode="classifier")

        if is_macos() and macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df['prediction'] = xgb_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_classifier(spec, df)
            self._check_metrics(metrics)
Exemplo n.º 23
0
    def _train_convert_evaluate_assert(self, **scikit_params):
        """
        Train a scikit-learn model, convert it and then evaluate it with CoreML
        """
        scikit_model = DecisionTreeRegressor(random_state=1, **scikit_params)
        scikit_model.fit(self.X, self.target)

        # Convert the model
        spec = skl_converter.convert(scikit_model, self.feature_names,
                                     self.output_name)

        if is_macos() and macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(self.X, columns=self.feature_names)
            df['prediction'] = scikit_model.predict(self.X)

            # Evaluate it
            metrics = evaluate_regressor(spec,
                                         df,
                                         target='target',
                                         verbose=False)
            self._check_metrics(metrics, scikit_params)
Exemplo n.º 24
0
    def test_boston_OHE_plus_trees(self):

        data = load_boston()

        pl = Pipeline([("OHE",
                        OneHotEncoder(categorical_features=[8], sparse=False)),
                       ("Trees", GradientBoostingRegressor(random_state=1))])

        pl.fit(data.data, data.target)

        # Convert the model
        spec = convert(pl, data.feature_names, 'target')

        if is_macos() and macos_version() >= (10, 13):
            # Get predictions
            df = pd.DataFrame(data.data, columns=data.feature_names)
            df['prediction'] = pl.predict(data.data)

            # Evaluate it
            result = evaluate_regressor(spec, df, 'target', verbose=False)

            assert result["max_error"] < 0.0001
Exemplo n.º 25
0
    def test_input_names(self):
        data = load_boston()
        df = pd.DataFrame({'input': data['data'].tolist()})

        # Default values
        spec = libsvm.convert(self.libsvm_model)
        if is_macos() and macos_version() >= (10, 13):
            (df['prediction'], _,
             _) = svmutil.svm_predict(data['target'], data['data'].tolist(),
                                      self.libsvm_model)
            metrics = evaluate_regressor(spec, df)
            self.assertAlmostEquals(metrics['max_error'], 0)

        # One extra parameters. This is legal/possible.
        num_inputs = len(data['data'][0])
        spec = libsvm.convert(self.libsvm_model, input_length=num_inputs + 1)

        # Not enought input names.
        input_names = ['this', 'is', 'not', 'enought', 'names']
        with self.assertRaises(ValueError):
            libsvm.convert(self.libsvm_model, input_names=input_names)
        with self.assertRaises(ValueError):
            libsvm.convert(self.libsvm_model, input_length=num_inputs - 1)
Exemplo n.º 26
0
def create_model(spec):
    """
    Create MLModel with specified types
    Parameters
    ----------
    spec: Pb spec from 3rd party converted model

    Returns
    -------
    MLModel
    """
    return coremltools.models.MLModel(spec)


@unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
                     'Only supported on macOS 10.13+')
class TestIODataTypes(unittest.TestCase):
    """
    This class tests for different I/O feature data types for an .mlmodel
    It will cover the following areas to test for:
    - All features must have a valid type
    - Multiarrays must have a valid dataType. Inputs must specify shape. Shape must have >= 0 elements
    - Images must have a valid colorspace. width & height have to be >= 0
    - Dictionaries must have a valid key type
    """
    @property
    def scikit_data(self):
        return load_boston()

    def _feature_data_type(self, dtype):
Exemplo n.º 27
0
class MLModelTest(unittest.TestCase):
    @classmethod
    def setUpClass(self):

        spec = Model_pb2.Model()
        spec.specificationVersion = coremltools.SPECIFICATION_VERSION

        features = ['feature_1', 'feature_2']
        output = 'output'
        for f in features:
            input_ = spec.description.input.add()
            input_.name = f
            input_.type.doubleType.MergeFromString(b'')

        output_ = spec.description.output.add()
        output_.name = output
        output_.type.doubleType.MergeFromString(b'')

        lr = spec.glmRegressor
        lr.offset.append(0.1)
        weights = lr.weights.add()
        coefs = [1.0, 2.0]
        for i in coefs:
            weights.value.append(i)

        spec.description.predictedFeatureName = 'output'
        self.spec = spec

    def test_model_creation(self):
        model = MLModel(self.spec)
        self.assertIsNotNone(model)

        filename = tempfile.mktemp(suffix='.mlmodel')
        save_spec(self.spec, filename)
        model = MLModel(filename)
        self.assertIsNotNone(model)

    def test_model_api(self):
        model = MLModel(self.spec)
        self.assertIsNotNone(model)

        model.author = 'Test author'
        self.assertEqual(model.author, 'Test author')
        self.assertEqual(model.get_spec().description.metadata.author,
                         'Test author')

        model.license = 'Test license'
        self.assertEqual(model.license, 'Test license')
        self.assertEqual(model.get_spec().description.metadata.license,
                         'Test license')

        model.short_description = 'Test model'
        self.assertEqual(model.short_description, 'Test model')
        self.assertEqual(
            model.get_spec().description.metadata.shortDescription,
            'Test model')

        model.input_description['feature_1'] = 'This is feature 1'
        self.assertEqual(model.input_description['feature_1'],
                         'This is feature 1')

        model.output_description['output'] = 'This is output'
        self.assertEqual(model.output_description['output'], 'This is output')

        filename = tempfile.mktemp(suffix='.mlmodel')
        model.save(filename)
        loaded_model = MLModel(filename)

        self.assertEqual(model.author, 'Test author')
        self.assertEqual(model.license, 'Test license')
        # self.assertEqual(model.short_description, 'Test model')
        self.assertEqual(model.input_description['feature_1'],
                         'This is feature 1')
        self.assertEqual(model.output_description['output'], 'This is output')

    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
                         'Only supported on macOS 10.13+')
    def test_predict_api(self):
        model = MLModel(self.spec)
        preds = model.predict({'feature_1': 1.0, 'feature_2': 1.0})
        self.assertIsNotNone(preds)
        self.assertEqual(preds['output'], 3.1)

    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
                         'Only supported on macOS 10.13+')
    def test_rename_input(self):
        rename_feature(self.spec,
                       'feature_1',
                       'renamed_feature',
                       rename_inputs=True)
        model = MLModel(self.spec)
        preds = model.predict({'renamed_feature': 1.0, 'feature_2': 1.0})
        self.assertIsNotNone(preds)
        self.assertEqual(preds['output'], 3.1)
        # reset the spec for next run
        rename_feature(self.spec,
                       'renamed_feature',
                       'feature_1',
                       rename_inputs=True)

    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
                         'Only supported on macOS 10.13+')
    def test_rename_input_bad(self):
        rename_feature(self.spec, 'blah', 'bad_name', rename_inputs=True)
        model = MLModel(self.spec)
        preds = model.predict({'feature_1': 1.0, 'feature_2': 1.0})
        self.assertIsNotNone(preds)
        self.assertEqual(preds['output'], 3.1)

    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
                         'Only supported on macOS 10.13+')
    def test_rename_output(self):
        rename_feature(self.spec,
                       'output',
                       'renamed_output',
                       rename_inputs=False,
                       rename_outputs=True)
        model = MLModel(self.spec)
        preds = model.predict({'feature_1': 1.0, 'feature_2': 1.0})
        self.assertIsNotNone(preds)
        self.assertEqual(preds['renamed_output'], 3.1)
        rename_feature(self.spec,
                       'renamed_output',
                       'output',
                       rename_inputs=False,
                       rename_outputs=True)

    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
                         'Only supported on macOS 10.13+')
    def test_rename_output_bad(self):
        rename_feature(self.spec,
                       'blah',
                       'bad_name',
                       rename_inputs=False,
                       rename_outputs=True)
        model = MLModel(self.spec)
        preds = model.predict({'feature_1': 1.0, 'feature_2': 1.0})
        self.assertIsNotNone(preds)
        self.assertEqual(preds['output'], 3.1)

    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
                         'Only supported on macOS 10.13+')
    def test_future_version(self):
        self.spec.specificationVersion = 10000
        filename = tempfile.mktemp(suffix='.mlmodel')
        save_spec(self.spec, filename, auto_set_specification_version=False)
        model = MLModel(filename)
        # this model should exist, but throw an exception when we try to use
        # predict because the engine doesn't support this model version
        self.assertIsNotNone(model)
        with self.assertRaises(Exception):
            try:
                model.predict({})
            except Exception as e:
                assert 'Core ML model specification version' in str(e)
                raise
        self.spec.specificationVersion = 1

    @unittest.skipUnless(is_macos() and macos_version() < (10, 13),
                         'Only supported on macOS 10.13-')
    def test_MLModel_warning(self):
        self.spec.specificationVersion = 3
        import warnings
        with warnings.catch_warnings(record=True) as w:
            # Cause all warnings to always be triggered.
            warnings.simplefilter("always")
            model = MLModel(self.spec)
            assert len(w) == 1
            assert issubclass(w[-1].category, RuntimeWarning)
            assert "not able to run predict()" in str(w[-1].message)
        self.spec.specificationVersion = 1
        model = MLModel(self.spec)

    def test_convert_nn_spec_to_half_precision(self):
        # simple network with quantization layer
        input_features = [('data', datatypes.Array(3))]
        output_features = [('out', datatypes.Array(3))]
        builder = NeuralNetworkBuilder(input_features, output_features)
        weights = np.random.uniform(-0.5, 0.5, (3, 3))
        builder.add_inner_product(name='inner_product',
                                  W=weights,
                                  b=None,
                                  input_channels=3,
                                  output_channels=3,
                                  has_bias=False,
                                  input_name='data',
                                  output_name='out')
        model = MLModel(builder.spec)
        spec = convert_neural_network_spec_weights_to_fp16(model.get_spec())
        self.assertIsNotNone(spec)

        # simple network without quantization layer
        input_features = [('data', datatypes.Array(3))]
        output_features = [('out', datatypes.Array(3))]
        builder = NeuralNetworkBuilder(input_features, output_features)
        builder.add_lrn(name='lrn',
                        input_name='data',
                        output_name='out',
                        alpha=2,
                        beta=3,
                        local_size=1,
                        k=8)
        model = MLModel(builder.spec)
        spec = convert_neural_network_spec_weights_to_fp16(model.get_spec())
        self.assertIsNotNone(spec)

    @unittest.skip
    def test_downgrade_specification_version(self):
        # manually set a invalid specification version
        self.spec.specificationVersion = -1
        model = MLModel(self.spec)
        assert model.get_spec().specificationVersion == 1

        # manually set a high specification version
        self.spec.specificationVersion = 4
        filename = tempfile.mktemp(suffix='.mlmodel')
        save_spec(self.spec, filename, auto_set_specification_version=True)
        model = MLModel(filename)
        assert model.get_spec().specificationVersion == 1

        # simple neural network with only spec 1 layer
        input_features = [('data', datatypes.Array(3))]
        output_features = [('out', datatypes.Array(3))]
        builder = NeuralNetworkBuilder(input_features, output_features)
        builder.add_activation('relu', 'RELU', 'data', 'out')
        # set a high specification version
        builder.spec.specificationVersion = 3
        model = MLModel(builder.spec)
        filename = tempfile.mktemp(suffix='.mlmodel')
        model.save(filename)
        # load the model back
        model = MLModel(filename)
        assert model.get_spec().specificationVersion == 1

        # test save without automatic set specification version
        self.spec.specificationVersion = 3
        filename = tempfile.mktemp(suffix='.mlmodel')
        save_spec(self.spec, filename, auto_set_specification_version=False)
        model = MLModel(filename)
        # the specification version should be original
        assert model.get_spec().specificationVersion == 3

    def test_multiarray_type_convert_to_float(self):
        input_features = [('data', datatypes.Array(2))]
        output_features = [('out', datatypes.Array(2))]
        builder = NeuralNetworkBuilder(input_features, output_features)
        builder.add_ceil('ceil', 'data', 'out')
        spec = builder.spec
        self.assertEqual(
            spec.description.input[0].type.multiArrayType.dataType,
            Model_pb2.ArrayFeatureType.DOUBLE)
        self.assertEqual(
            spec.description.output[0].type.multiArrayType.dataType,
            Model_pb2.ArrayFeatureType.DOUBLE)
        convert_double_to_float_multiarray_type(spec)
        self.assertEqual(
            spec.description.input[0].type.multiArrayType.dataType,
            Model_pb2.ArrayFeatureType.FLOAT32)
        self.assertEqual(
            spec.description.output[0].type.multiArrayType.dataType,
            Model_pb2.ArrayFeatureType.FLOAT32)
Exemplo n.º 28
0
    def _test_rnn_layer(self, keras_major_version, limit=None):
        i = 0
        layer_name = str(SimpleRNN).split('.')[3].split("'>")[0]
        numerical_err_models = []
        shape_err_models = []
        params = list(
            itertools.product(self.base_layer_params, self.rnn_layer_params))
        np.random.shuffle(params)
        params = [
            param for param in params
            if valid_params(dict(zip(self.params_dict.keys(), param[0])))
        ]
        for base_params, rnn_params in params[:limit]:
            base_params = dict(zip(self.params_dict.keys(), base_params))
            rnn_params = dict(
                zip(self.simple_rnn_params_dict.keys(), rnn_params))
            input_data = generate_input(base_params['input_dims'][0],
                                        base_params['input_dims'][1],
                                        base_params['input_dims'][2])
            model = Sequential()
            settings = dict(
                activation=base_params['activation'],
                return_sequences=base_params['return_sequences'],
                go_backwards=base_params['go_backwards'],
                unroll=base_params['unroll'],
            )
            if keras_major_version == 2:
                model.add(
                    SimpleRNN(
                        base_params['output_dim'],
                        input_shape=base_params['input_dims'][1:],
                        dropout=rnn_params['dropout']['dropout_U'],
                        recurrent_dropout=rnn_params['dropout']['dropout_W'],
                        kernel_regularizer=rnn_params['regularizer']
                        ['W_regularizer'],
                        recurrent_regularizer=rnn_params['regularizer']
                        ['U_regularizer'],
                        bias_regularizer=rnn_params['regularizer']
                        ['b_regularizer'],
                        **settings))
            else:
                model.add(
                    SimpleRNN(base_params['output_dim'],
                              input_length=base_params['input_dims'][1],
                              input_dim=base_params['input_dims'][2],
                              dropout_U=rnn_params['dropout']['dropout_U'],
                              dropout_W=rnn_params['dropout']['dropout_W'],
                              W_regularizer=rnn_params['regularizer']
                              ['W_regularizer'],
                              U_regularizer=rnn_params['regularizer']
                              ['U_regularizer'],
                              b_regularizer=rnn_params['regularizer']
                              ['b_regularizer'],
                              **settings))
            model_dir = tempfile.mkdtemp()
            keras_model_path = os.path.join(model_dir, 'keras.h5')
            coreml_model_path = os.path.join(model_dir, 'keras.mlmodel')
            model.save_weights(keras_model_path)
            mlkitmodel = _get_mlkit_model_from_path(model, coreml_model_path)
            if is_macos() and macos_version() >= (10, 13):
                keras_preds = model.predict(input_data).flatten()
                input_data = np.transpose(input_data, [1, 0, 2])
                coreml_preds = mlkitmodel.predict({'data': input_data
                                                   })['output'].flatten()
                try:
                    self.assertEquals(coreml_preds.shape, keras_preds.shape)
                except AssertionError:
                    print(
                        "Shape error:\nbase_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}"
                        .format(base_params, keras_preds.shape,
                                coreml_preds.shape))
                    shape_err_models.append(base_params)
                    shutil.rmtree(model_dir)
                    i += 1
                    continue
                try:
                    for idx in range(0, len(coreml_preds)):
                        relative_error = (coreml_preds[idx] -
                                          keras_preds[idx]) / coreml_preds[idx]
                        self.assertAlmostEqual(relative_error, 0, places=2)
                except AssertionError:
                    print(
                        "Assertion error:\nbase_params: {}\nkeras_preds: {}\ncoreml_preds: {}"
                        .format(base_params, keras_preds, coreml_preds))
                    numerical_err_models.append(base_params)
            shutil.rmtree(model_dir)
            i += 1

        self.assertEquals(shape_err_models, [],
                          msg='Shape error models {}'.format(shape_err_models))
        self.assertEquals(
            numerical_err_models, [],
            msg='Numerical error models {}'.format(numerical_err_models))
class NearestNeighborsBuilderTest(unittest.TestCase):
    """
    Unit tests for the nearest neighbors builder class.
    """
    def setUp(self):
        iris_samples = load_iris()
        self.iris_X = iris_samples.data
        self.iris_y = iris_samples.target
        self.training_X = self.iris_X[-30:]
        self.training_y = self.iris_y[-30:]

    def tearDown(self):
        # Do any cleanup here
        pass

    def create_builder(self, default_class_label='default_label'):
        builder = KNearestNeighborsClassifierBuilder(
            input_name='input',
            output_name='output',
            number_of_dimensions=4,
            default_class_label=default_class_label)
        return builder

    def test_builder_output_types(self):
        builder = self.create_builder(default_class_label='default')
        self.assertIsNotNone(builder)
        self.assertTrue(
            builder.spec.kNearestNeighborsClassifier.HasField(
                "stringClassLabels"))

        builder = self.create_builder(default_class_label=12)
        self.assertIsNotNone(builder)
        self.assertTrue(
            builder.spec.kNearestNeighborsClassifier.HasField(
                "int64ClassLabels"))

        with self.assertRaises(TypeError):
            bad_default_label = float(21.32)
            self.create_builder(default_class_label=bad_default_label)

    def test_builder_training_input(self):
        builder = self.create_builder(default_class_label='default')
        self.assertIsNotNone(builder)
        self.assertTrue(
            builder.spec.kNearestNeighborsClassifier.HasField(
                "stringClassLabels"))

        self.assertEqual(builder.spec.description.trainingInput[0].name,
                         'input')
        self.assertEqual(
            builder.spec.description.trainingInput[0].type.WhichOneof('Type'),
            'multiArrayType')
        self.assertEqual(builder.spec.description.trainingInput[1].name,
                         'output')
        self.assertEqual(
            builder.spec.description.trainingInput[1].type.WhichOneof('Type'),
            'stringType')

    def test_make_updatable(self):
        builder = self.create_builder()
        self.assertIsNotNone(builder)

        self.assertTrue(builder.spec.isUpdatable)
        builder.is_updatable = False
        self.assertFalse(builder.spec.isUpdatable)
        builder.is_updatable = True
        self.assertTrue(builder.spec.isUpdatable)

    def test_author(self):
        builder = self.create_builder()
        self.assertIsNotNone(builder)

        self.assertEqual(builder.spec.description.metadata.author, '')
        builder.author = 'John Doe'
        self.assertEqual(builder.author, 'John Doe')
        self.assertEqual(builder.spec.description.metadata.author, 'John Doe')

    def test_description(self):
        builder = self.create_builder()
        self.assertIsNotNone(builder)

        self.assertEqual(builder.spec.description.metadata.shortDescription,
                         '')
        builder.description = 'This is a description'
        self.assertEqual(builder.description, 'This is a description')
        self.assertEqual(builder.spec.description.metadata.shortDescription,
                         'This is a description')

    def test_weighting_scheme(self):
        builder = self.create_builder()
        self.assertIsNotNone(builder)

        builder.weighting_scheme = 'uniform'
        self.assertEqual(builder.weighting_scheme, 'uniform')

        builder.weighting_scheme = 'inverse_distance'
        self.assertEqual(builder.weighting_scheme, 'inverse_distance')

        builder.weighting_scheme = 'unIfOrM'
        self.assertEqual(builder.weighting_scheme, 'uniform')

        builder.weighting_scheme = 'InVerSE_DISTance'
        self.assertEqual(builder.weighting_scheme, 'inverse_distance')

        with self.assertRaises(TypeError):
            builder.weighting_scheme = 'test'

    def test_index_type(self):
        builder = self.create_builder()
        self.assertIsNotNone(builder)

        self.assertEqual(builder.index_type, 'linear')
        self.assertEqual(builder.leaf_size, 0)

        builder.set_index_type('kd_tree')
        self.assertEqual(builder.index_type, 'kd_tree')  # test default value
        self.assertEqual(builder.leaf_size, 30)

        builder.set_index_type('linear')
        self.assertEqual(builder.index_type, 'linear')
        self.assertEqual(builder.leaf_size, 0)

        builder.set_index_type('kd_tree',
                               leaf_size=45)  # test user-defined value
        self.assertEqual(builder.index_type, 'kd_tree')
        self.assertEqual(builder.leaf_size, 45)

        builder.set_index_type('linear', leaf_size=37)
        self.assertEqual(builder.index_type, 'linear')
        self.assertEqual(builder.leaf_size, 0)

        builder.set_index_type('KD_TrEe',
                               leaf_size=22)  # test user-defined value
        self.assertEqual(builder.index_type, 'kd_tree')
        self.assertEqual(builder.leaf_size, 22)

        builder.set_index_type('linEAR')
        self.assertEqual(builder.index_type, 'linear')
        self.assertEqual(builder.leaf_size, 0)

        with self.assertRaises(TypeError):
            builder.set_index_type('unsupported_index')

        with self.assertRaises(TypeError):
            builder.set_index_type('kd_tree', -10)

        with self.assertRaises(TypeError):
            builder.set_index_type('kd_tree', 0)

    def test_leaf_size(self):
        builder = self.create_builder()
        self.assertIsNotNone(builder)

        builder.set_index_type('kd_tree',
                               leaf_size=45)  # test user-defined value
        self.assertEqual(builder.index_type, 'kd_tree')
        self.assertEqual(builder.leaf_size, 45)

        builder.leaf_size = 12
        self.assertEqual(builder.index_type, 'kd_tree')
        self.assertEqual(builder.leaf_size, 12)

    def test_set_number_of_neighbors_with_bounds(self):
        builder = self.create_builder()
        self.assertIsNotNone(builder)

        self.assertEqual(builder.number_of_neighbors, 5)
        (min_value, max_value) = builder.number_of_neighbors_allowed_range()
        self.assertEqual(min_value, 1)
        self.assertEqual(max_value, 1000)

        builder.set_number_of_neighbors_with_bounds(12, allowed_range=(2, 24))
        (min_value, max_value) = builder.number_of_neighbors_allowed_range()
        self.assertEqual(builder.number_of_neighbors, 12)
        self.assertEqual(min_value, 2)
        self.assertEqual(max_value, 24)
        allowed_values = builder.number_of_neighbors_allowed_set()
        self.assertIsNone(allowed_values)

        test_set = {3, 5, 7, 9}
        builder.set_number_of_neighbors_with_bounds(7, allowed_set=test_set)
        self.assertEqual(builder.number_of_neighbors, 7)
        allowed_values = builder.number_of_neighbors_allowed_set()
        self.assertIsNotNone(allowed_values)
        self.assertEqual(allowed_values, test_set)

    def test_set_number_of_neighbors_with_bounds_error_conditions(self):
        builder = self.create_builder()
        self.assertIsNotNone(builder)

        with self.assertRaises(ValueError):
            builder.set_number_of_neighbors_with_bounds(3)

        test_range = (3, 15)
        test_set = {1, 3, 5}
        with self.assertRaises(ValueError):
            builder.set_number_of_neighbors_with_bounds(
                3, allowed_range=test_range, allowed_set=test_set)

        with self.assertRaises(ValueError):
            builder.set_number_of_neighbors_with_bounds(3,
                                                        allowed_range=(-5, 5))

        with self.assertRaises(ValueError):
            builder.set_number_of_neighbors_with_bounds(3,
                                                        allowed_range=(5, 1))

        with self.assertRaises(ValueError):
            builder.set_number_of_neighbors_with_bounds(
                3, allowed_range=test_range, allowed_set=test_set)

        with self.assertRaises(ValueError):
            builder.set_number_of_neighbors_with_bounds(
                2, allowed_range=test_range)

        with self.assertRaises(TypeError):
            builder.set_number_of_neighbors_with_bounds(5,
                                                        allowed_set={5, -3, 7})

        with self.assertRaises(ValueError):
            builder.set_number_of_neighbors_with_bounds(4,
                                                        allowed_set=test_set)

        with self.assertRaises(ValueError):
            builder.set_number_of_neighbors_with_bounds(4,
                                                        allowed_set=test_set)

        with self.assertRaises(TypeError):
            builder.set_number_of_neighbors_with_bounds(2,
                                                        allowed_set=[1, 2, 3])

        with self.assertRaises(TypeError):
            builder.set_number_of_neighbors_with_bounds(4,
                                                        allowed_range={2, 200})

        with self.assertRaises(TypeError):
            builder.set_number_of_neighbors_with_bounds(4,
                                                        allowed_range=(2, 10,
                                                                       20))

        with self.assertRaises(TypeError):
            builder.set_number_of_neighbors_with_bounds(4, allowed_set=set())

        with self.assertRaises(TypeError):
            builder.set_number_of_neighbors_with_bounds(4, allowed_range=[])

    def test_set_number_of_neighbors(self):
        builder = self.create_builder()
        self.assertIsNotNone(builder)

        builder.set_number_of_neighbors_with_bounds(12, allowed_range=(2, 24))
        self.assertEqual(builder.number_of_neighbors, 12)

        with self.assertRaises(ValueError):
            builder.set_number_of_neighbors_with_bounds(1,
                                                        allowed_range=(2, 24))
        builder.set_number_of_neighbors_with_bounds(4, allowed_range=(2, 24))
        self.assertEqual(builder.number_of_neighbors, 4)

        test_set = {3, 5, 7, 9}
        builder.set_number_of_neighbors_with_bounds(7, allowed_set=test_set)

        with self.assertRaises(ValueError):
            builder.set_number_of_neighbors_with_bounds(4,
                                                        allowed_set=test_set)
        builder.set_number_of_neighbors_with_bounds(5, allowed_set=test_set)
        self.assertEqual(builder.number_of_neighbors, 5)

    def test_add_samples_invalid_data(self):
        builder = self.create_builder()
        self.assertIsNotNone(builder)

        invalid_X = [[1.0, 2.4]]
        with self.assertRaises(TypeError):
            builder.add_samples(invalid_X, self.training_y)

        with self.assertRaises(TypeError):
            builder.add_samples(self.training_X, self.training_y[:3])

        with self.assertRaises(TypeError):
            builder.add_samples([], self.training_y)

        with self.assertRaises(TypeError):
            builder.add_samples(self.training_X, [])

    def test_add_samples_int_labels(self):
        builder = self.create_builder(default_class_label=12)
        self.assertIsNotNone(builder)

        some_X = self.training_X[:10]
        some_y = self.training_y[:10]
        builder.add_samples(some_X, some_y)
        self._validate_samples(builder.spec, some_X, some_y)

        addl_X = self.training_X[10:20]
        addl_y = self.training_y[10:20]
        builder.add_samples(addl_X, addl_y)
        self._validate_samples(builder.spec, self.training_X[:20],
                               self.training_y[:20])

    def test_add_samples_string_labels(self):
        builder = self.create_builder(default_class_label='default')
        self.assertIsNotNone(builder)

        some_X = self.training_X[:3]
        some_y = ['one', 'two', 'three']
        builder.add_samples(some_X, some_y)
        self._validate_samples(builder.spec, some_X, some_y)

        addl_X = self.training_X[3:6]
        addl_y = ['four', 'five', 'six']
        builder.add_samples(addl_X, addl_y)
        self._validate_samples(builder.spec, self.training_X[0:6],
                               some_y + addl_y)

    def test_add_samples_invalid_label_types(self):
        builder_int_labels = self.create_builder(default_class_label=42)
        self.assertIsNotNone(builder_int_labels)

        some_X = self.training_X[:3]
        invalid_int_y = [0, 'one', 2]
        with self.assertRaises(TypeError):
            builder_int_labels.add_samples(some_X, invalid_int_y)

        builder_string_labels = self.create_builder(
            default_class_label='default')
        self.assertIsNotNone(builder_string_labels)

        invalid_string_y = ['zero', 'one', 2]
        with self.assertRaises(TypeError):
            builder_string_labels.add_samples(some_X, invalid_string_y)

    @unittest.skipUnless(is_macos(), 'Only supported on MacOS platform.')
    def test_can_init_and_save_model_from_builder_with_updated_spec(self):
        builder = KNearestNeighborsClassifierBuilder(
            input_name='input',
            output_name='output',
            number_of_dimensions=10,
            default_class_label='defaultLabel',
            k=3,
            weighting_scheme='inverse_distance',
            index_type='kd_tree',
            leaf_size=50)
        builder.author = 'CoreML Team'
        builder.license = 'MIT'
        builder.description = 'test_builder_with_validation'

        # Save the updated spec
        coreml_model = MLModel(builder.spec)
        self.assertIsNotNone(coreml_model)
        coreml_model_path = '/tmp/__test_builder_with_validation.mlmodel'

        try:
            coreml_model.save(coreml_model_path)
            self.assertTrue(os.path.isfile(coreml_model_path))
        finally:
            self._delete_mlmodel_and_mlmodelc(coreml_model_path)

    @unittest.skipUnless(is_macos(), 'Only supported on MacOS platform.')
    def test_can_init_and_save_model_from_builder_default_parameters(self):
        builder = KNearestNeighborsClassifierBuilder(
            input_name='input',
            output_name='output',
            number_of_dimensions=4,
            default_class_label='defaultLabel')

        # Save the updated spec
        coreml_model = MLModel(builder.spec)
        self.assertIsNotNone(coreml_model)
        coreml_model_path = '/tmp/__test_builder_with_validation.mlmodel'

        try:
            coreml_model.save(coreml_model_path)
            self.assertTrue(os.path.isfile(coreml_model_path))
        finally:
            self._delete_mlmodel_and_mlmodelc(coreml_model_path)

    def _validate_samples(self, spec, expected_X, expected_y):
        """Validate the float samples returned from the converted scikit KNeighborsClassifier"""
        num_dimensions = spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions
        for index, sample in enumerate(spec.kNearestNeighborsClassifier.
                                       nearestNeighborsIndex.floatSamples):
            for dim in range(0, num_dimensions):
                self.assertAlmostEqual(sample.vector[dim],
                                       expected_X[index][dim],
                                       places=6)

        if spec.kNearestNeighborsClassifier.HasField("int64ClassLabels"):
            for index, label in enumerate(
                    spec.kNearestNeighborsClassifier.int64ClassLabels.vector):
                self.assertEqual(label, expected_y[index])

        elif spec.kNearestNeighborsClassifier.HasField("stringClassLabels"):
            for index, label in enumerate(
                    spec.kNearestNeighborsClassifier.stringClassLabels.vector):
                self.assertEqual(label, expected_y[index])

    @staticmethod
    def _delete_mlmodel_and_mlmodelc(path_to_mlmodel):
        """Delete the .mlmodel and .mlmodelc for the given .mlmodel."""
        if os.path.exists(path_to_mlmodel):
            os.remove(path_to_mlmodel)
        path_to_mlmodelc = '{}c'.format(path_to_mlmodel)
        if os.path.exists(path_to_mlmodelc):
            shutil.rmtree(path_to_mlmodelc)
Exemplo n.º 30
0
class LinearRegressionScikitTest(unittest.TestCase):
    """
    Unit test class for testing scikit-learn converter.
    """
    @classmethod
    def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.linear_model import LinearRegression

        scikit_data = load_boston()
        scikit_model = LinearRegression()
        scikit_model.fit(scikit_data['data'], scikit_data['target'])

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model

    def test_conversion(self):
        input_names = self.scikit_data.feature_names
        spec = convert(self.scikit_model, input_names, 'target').get_spec()
        self.assertIsNotNone(spec)

        # Test the model class
        self.assertIsNotNone(spec.description)

        # Test the interface class
        self.assertEquals(spec.description.predictedFeatureName, 'target')

        # Test the inputs and outputs
        self.assertEquals(len(spec.description.output), 1)
        self.assertEquals(spec.description.output[0].name, 'target')
        self.assertEquals(spec.description.output[0].type.WhichOneof('Type'),
                          'doubleType')
        for input_type in spec.description.input:
            self.assertEquals(input_type.type.WhichOneof('Type'), 'doubleType')
        self.assertEqual(sorted(input_names),
                         sorted(map(lambda x: x.name, spec.description.input)))

        # Test the linear regression parameters.
        self.assertTrue(spec.pipelineRegressor.pipeline.models[-1].HasField(
            'glmRegressor'))
        lr = spec.pipelineRegressor.pipeline.models[-1].glmRegressor
        self.assertEquals(lr.offset, self.scikit_model.intercept_)
        self.assertEquals(len(lr.weights), 1)
        self.assertEquals(len(lr.weights[0].value), 13)
        i = 0
        for w in lr.weights[0].value:
            self.assertAlmostEqual(w, self.scikit_model.coef_[i])
            i = i + 1

    def test_conversion_bad_inputs(self):
        # Error on converting an untrained model
        with self.assertRaises(TypeError):
            model = LinearRegression()
            spec = convert(model, 'data', 'out')

        # Check the expected class during covnersion.
        from sklearn.preprocessing import OneHotEncoder
        with self.assertRaises(TypeError):
            model = OneHotEncoder()
            spec = convert(model, 'data', 'out')

    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
                         'Only supported on macOS 10.13+')
    def test_linear_regression_evaluation(self):
        """
        Check that the evaluation results are the same in scikit learn and coremltools
        """
        input_names = self.scikit_data.feature_names
        df = pd.DataFrame(self.scikit_data.data, columns=input_names)

        for normalize_value in (True, False):
            cur_model = LinearRegression(normalize=normalize_value)
            cur_model.fit(self.scikit_data['data'], self.scikit_data['target'])
            spec = convert(cur_model, input_names, 'target')

            df['prediction'] = cur_model.predict(self.scikit_data.data)

            metrics = evaluate_regressor(spec, df)
            self.assertAlmostEquals(metrics['max_error'], 0)

    @unittest.skipUnless(is_macos() and macos_version() >= (10, 13),
                         'Only supported on macOS 10.13+')
    def test_linear_svr_evaluation(self):
        """
        Check that the evaluation results are the same in scikit learn and coremltools
        """
        ARGS = [{}, {
            'C': 0.5,
            'epsilon': 0.25
        }, {
            'dual': False,
            'loss': 'squared_epsilon_insensitive'
        }, {
            'tol': 0.005
        }, {
            'fit_intercept': False
        }, {
            'intercept_scaling': 1.5
        }]

        input_names = self.scikit_data.feature_names
        df = pd.DataFrame(self.scikit_data.data, columns=input_names)

        for cur_args in ARGS:
            print(cur_args)
            cur_model = LinearSVR(**cur_args)
            cur_model.fit(self.scikit_data['data'], self.scikit_data['target'])
            spec = convert(cur_model, input_names, 'target')

            df['prediction'] = cur_model.predict(self.scikit_data.data)

            metrics = evaluate_regressor(spec, df)
            self.assertAlmostEquals(metrics['max_error'], 0)