def test_default_names(self): df = pd.DataFrame({'input': self.x}) # Test with probabilities spec = libsvm.convert(self.libsvm_model).get_spec() if is_macos() and macos_version() >= (10, 13): (_, _, probability_lists) = svm_predict(self.y, self.x, self.libsvm_model, '-b 1 -q') probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df['classProbability'] = probability_dicts metrics = evaluate_classifier_with_probabilities( spec, df, verbose=False, probabilities='classProbability') self.assertLess(metrics['max_probability_error'], 0.00001) # Test model without probabilities no_probability_model = svmutil.svm_train(self.prob, svmutil.svm_parameter()) spec = libsvm.convert(no_probability_model).get_spec() self.assertEqual(len(spec.description.output), 1) self.assertEqual(spec.description.output[0].name, u'target') if is_macos() and macos_version() >= (10, 13): (df['prediction'], _, _) = svm_predict(self.y, self.x, no_probability_model, ' -q') metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics['num_errors'], 0)
def test_classifier_no_name(self): np.random.seed(1988) input_dim = 5 num_hidden = 12 num_classes = 6 input_length = 3 model = Sequential() model.add( LSTM(num_hidden, input_dim=input_dim, input_length=input_length, return_sequences=False)) model.add(Dense(num_classes, activation='softmax')) model.set_weights( [np.random.rand(*w.shape) for w in model.get_weights()]) input_names = ['input'] output_names = ['zzzz'] class_labels = ['a', 'b', 'c', 'd', 'e', 'f'] predicted_feature_name = 'pf' coremlmodel = keras_converter.convert( model, input_names, output_names, class_labels=class_labels, predicted_feature_name=predicted_feature_name) if is_macos() and macos_version() >= (10, 13): inputs = np.random.rand(input_dim) outputs = coremlmodel.predict({'input': inputs}) # this checks that the dictionary got the right name and type self.assertEquals(type(outputs[output_names[0]]), type({'a': 0.5}))
def _conversion_and_evaluation_helper_for_logistic_regression(self, class_labels): options = { 'C': (0.1, 1., 2.), 'fit_intercept': (True, False), 'class_weight': ('balanced', None), 'solver': ('newton-cg', 'lbfgs', 'liblinear', 'sag') } # Generate a list of all combinations of options and the default parameters product = itertools.product(*options.values()) args = [{}] + [dict(zip(options.keys(), p)) for p in product] x, y = GlmCassifierTest._generate_random_data(class_labels) column_names = ['x1', 'x2'] df = pd.DataFrame(x, columns=column_names) for cur_args in args: print(class_labels, cur_args) cur_model = LogisticRegression(**cur_args) cur_model.fit(x, y) spec = convert(cur_model, input_features=column_names, output_feature_names='target') if is_macos() and macos_version() >= (10, 13): probability_lists = cur_model.predict_proba(x) df['classProbability'] = [dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists] metrics = evaluate_classifier_with_probabilities(spec, df, probabilities='classProbability', verbose=False) self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], 0.00001)
def _conversion_and_evaluation_helper_for_linear_svc(self, class_labels): ARGS = [ {}, {'C' : .75, 'loss': 'hinge'}, {'penalty': 'l1', 'dual': False}, {'tol': 0.001, 'fit_intercept': False}, {'intercept_scaling': 1.5} ] x, y = GlmCassifierTest._generate_random_data(class_labels) column_names = ['x1', 'x2'] df = pd.DataFrame(x, columns=column_names) for cur_args in ARGS: print(class_labels, cur_args) cur_model = LinearSVC(**cur_args) cur_model.fit(x, y) spec = convert(cur_model, input_features=column_names, output_feature_names='target') if is_macos() and macos_version() >= (10, 13): df['prediction'] = cur_model.predict(x) cur_eval_metics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(cur_eval_metics['num_errors'], 0)
def _test_prob_model(self, param1, param2): probability_param = '-b 1' df = self.df param_str = ' '.join( [self.base_param, param1, param2, probability_param]) param = svm_parameter(param_str) model = svm_train(self.prob, param) # Get predictions with probabilities as dictionaries (df['prediction'], _, probability_lists) = svm_predict(self.y, self.x, model, probability_param + ' -q') probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df['probabilities'] = probability_dicts spec = libsvm.convert(model, self.column_names, 'target', 'probabilities') if is_macos() and macos_version() >= (10, 13): metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False) self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], 0.00001)
def _train_convert_evaluate_assert(self, bt_params={}, **params): """ Set up the unit test by loading the dataset and training a model. """ # Train a model xgb_model = xgboost.XGBRegressor(**params) xgb_model.fit(self.X, self.target) # Convert the model (feature_names can't be given because of XGboost) spec = xgb_converter.convert(xgb_model, self.feature_names, self.output_name, force_32bit_float=False) if is_macos() and macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df['prediction'] = xgb_model.predict(self.X) # Evaluate it metrics = evaluate_regressor(spec, df, target='target', verbose=False) self._check_metrics(metrics, bt_params)
def test_int_features_in_pipeline(self): import numpy.random as rn import pandas as pd rn.seed(0) x_train_dict = [ dict((rn.randint(100), 1) for i in range(20)) for j in range(100) ] y_train = [0, 1] * 50 from sklearn.pipeline import Pipeline from sklearn.feature_extraction import DictVectorizer from sklearn.linear_model import LogisticRegression pl = Pipeline([("dv", DictVectorizer()), ("lm", LogisticRegression())]) pl.fit(x_train_dict, y_train) import coremltools model = coremltools.converters.sklearn.convert( pl, input_features="features", output_feature_names="target") if is_macos() and macos_version() >= (10, 13): x = pd.DataFrame({ "features": x_train_dict, "prediction": pl.predict(x_train_dict) }) cur_eval_metics = evaluate_classifier(model, x) self.assertEquals(cur_eval_metics['num_errors'], 0)
def _test_evaluation(self, allow_slow): """ Test that the same predictions are made """ from svm import svm_parameter, svm_problem from svmutil import svm_train, svm_predict # Generate some smallish (poly kernels take too long on anything else) random data x, y = [], [] for _ in range(50): cur_x1, cur_x2 = random.gauss(2, 3), random.gauss(-1, 2) x.append([cur_x1, cur_x2]) y.append(1 + 2 * cur_x1 + 3 * cur_x2) input_names = ['x1', 'x2'] df = pd.DataFrame(x, columns=input_names) prob = svm_problem(y, x) # Parameters base_param = '-s 3' # model type is epsilon SVR non_kernel_parameters = [ '', '-c 1.5 -p 0.5 -h 1', '-c 0.5 -p 0.5 -h 0' ] kernel_parameters = [ '', '-t 2 -g 1.2', # rbf kernel '-t 0', # linear kernel '-t 1', '-t 1 -d 2', '-t 1 -g 0.75', '-t 1 -d 0 -g 0.9 -r 2', # poly kernel '-t 3', '-t 3 -g 1.3', '-t 3 -r 0.8', '-t 3 -r 0.8 -g 0.5' # sigmoid kernel ] for param1 in non_kernel_parameters: for param2 in kernel_parameters: param_str = ' '.join([base_param, param1, param2]) print(param_str) param = svm_parameter(param_str) model = svm_train(prob, param) (df['prediction'], _, _) = svm_predict(y, x, model) spec = libsvm.convert(model, input_names=input_names, target_name='target') if is_macos() and macos_version() >= (10, 13): metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0) if not allow_slow: break if not allow_slow: break
def test_internal_layer(self): np.random.seed(1988) input_dim = 5 num_channels1 = 10 num_channels2 = 7 num_channels3 = 5 w1 = (np.random.rand(input_dim, num_channels1) - 0.5) / 5.0 w2 = (np.random.rand(num_channels1, num_channels2) - 0.5) / 5.0 w3 = (np.random.rand(num_channels2, num_channels3) - 0.5) / 5.0 b1 = (np.random.rand(num_channels1, ) - 0.5) / 5.0 b2 = (np.random.rand(num_channels2, ) - 0.5) / 5.0 b3 = (np.random.rand(num_channels3, ) - 0.5) / 5.0 model = Sequential() model.add(Dense(num_channels1, input_dim=input_dim)) model.add(Dense(num_channels2, name='middle_layer')) model.add(Dense(num_channels3)) model.set_weights([w1, b1, w2, b2, w3, b3]) input_names = ['input'] output_names = ['output'] coreml1 = keras_converter.convert(model, input_names, output_names) # adjust the output parameters of coreml1 to include the intermediate layer spec = coreml1.get_spec() coremlNewOutputs = spec.description.output.add() coremlNewOutputs.name = 'middle_layer_output' coremlNewParams = coremlNewOutputs.type.multiArrayType coremlNewParams.dataType = coremltools.proto.FeatureTypes_pb2.ArrayFeatureType.ArrayDataType.Value( 'DOUBLE') coremlNewParams.shape.extend([num_channels2]) coremlfinal = coremltools.models.MLModel(spec) # generate a second model which model2 = Sequential() model2.add(Dense(num_channels1, input_dim=input_dim)) model2.add(Dense(num_channels2)) model2.set_weights([w1, b1, w2, b2]) coreml2 = keras_converter.convert(model2, input_names, ['output2']) if is_macos() and macos_version() >= (10, 13): # generate input data inputs = np.random.rand(input_dim) fullOutputs = coremlfinal.predict({'input': inputs}) partialOutput = coreml2.predict({'input': inputs}) for i in range(0, num_channels2): self.assertAlmostEquals(fullOutputs['middle_layer_output'][i], partialOutput['output2'][i], 2)
def run_case(self, layer_type, input_layer, output_layer, delta=1e-2): self.maxDiff = None extract_tarfile('{}nets/{}.gz'.format(nets_path, layer_type), '{}nets/'.format(nets_path)) nets = traverse_caffe_nets(layer_type) data_files = traverse_data_files(layer_type) failed_tests_load = [] failed_tests_conversion = [] failed_tests_evaluation = [] counter = 0 for net_name_proto in nets: counter += 1 net_data_files = [] proto_name = \ net_name_proto.split("_")[0] + \ "_" + \ net_name_proto.split("_")[1] for file in data_files: if proto_name + '_' in file: net_data_files.append(file) net_name = net_name_proto.split(".")[0] conversion_result = conversion_to_mlmodel(net_name, proto_name, layer_type, input_layer) if is_macos() and macos_version() >= (10, 13): if conversion_result is False: failed_tests_conversion.append(net_name) continue load_result = load_mlmodel(net_name, layer_type) if load_result is False: failed_tests_load.append(net_name) if 'input' in net_name: evaluation_result, failed_tests_evaluation = \ self.evaluate_model( net_name, layer_type, input_layer, output_layer, net_data_files, failed_tests_evaluation, counter, delta) with open('./failed_tests_{}.json'.format(layer_type), mode='w') \ as file: json.dump( { 'conversion': failed_tests_conversion, 'load': failed_tests_load, 'evaluation': failed_tests_evaluation }, file) self.assertEqual(failed_tests_conversion, []) self.assertEqual(failed_tests_load, []) self.assertEqual(failed_tests_evaluation, []) shutil.rmtree('{}nets/{}'.format(nets_path, layer_type))
def _test_model(self, input_dict, ref_output_dict, coreml_model): if is_macos() and macos_version() >= (10, 13): coreml_out_dict = coreml_model.predict(input_dict, useCPUOnly=True) for out_ in list(ref_output_dict.keys()): ref_out = ref_output_dict[out_] coreml_out = coreml_out_dict[out_] if self._compare_shapes(ref_out, coreml_out): return True, self._compare_predictions(ref_out, coreml_out) else: return False, False return True, True
def _test_conversion(self, data, trained_dict_vectorizer): X = trained_dict_vectorizer.transform(data) m = sklearn.convert(trained_dict_vectorizer, input_features="features", output_feature_names="output") if is_macos() and macos_version() >= (10, 13): ret = evaluate_transformer(m, [{ "features": row } for row in data], [{ "output": x_r } for x_r in X], True) assert ret["num_errors"] == 0
def _train_convert_evaluate_assert(self, **scikit_params): scikit_model = RandomForestClassifier(random_state=1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) if is_macos() and macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df['prediction'] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_classifier(spec, df, verbose=False) self._check_metrics(metrics, scikit_params)
def test_pipeline_rename(self): # Convert scikit_spec = converter.convert(self.scikit_model).get_spec() model = MLModel(scikit_spec) sample_data = self.scikit_data.data[0] # Rename rename_feature(scikit_spec, 'input', 'renamed_input') renamed_model = MLModel(scikit_spec) # Check the predictions if is_macos() and macos_version() >= (10, 13): self.assertEquals( model.predict({'input': sample_data}), renamed_model.predict({'renamed_input': sample_data}))
def _test_evaluation(self, allow_slow): """ Test that the same predictions are made """ # Generate some smallish (some kernels take too long on anything else) random data x, y = [], [] for _ in range(50): cur_x1, cur_x2 = random.gauss(2,3), random.gauss(-1,2) x.append([cur_x1, cur_x2]) y.append( 1 + 2*cur_x1 + 3*cur_x2 ) input_names = ['x1', 'x2'] df = pd.DataFrame(x, columns=input_names) # Parameters to test kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2}, {'kernel': 'linear'}, {'kernel': 'poly'}, {'kernel': 'poly', 'degree': 2}, {'kernel': 'poly', 'gamma': 0.75}, {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2}, {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8}, {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5} ] non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False, 'nu': 0.9}] # Test for param1 in non_kernel_parameters: for param2 in kernel_parameters: cur_params = param1.copy() cur_params.update(param2) cur_model = NuSVR(**cur_params) cur_model.fit(x, y) df['prediction'] = cur_model.predict(x) spec = scikit_converter.convert(cur_model, input_names, 'target') if is_macos() and macos_version() >= (10, 13): metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0) if not allow_slow: break if not allow_slow: break
def test_keras_1_image_bias(self): #define Keras model and get prediction input_shape=(100,50,3) model = Sequential() model.add(Activation('linear', input_shape=input_shape)) data = np.ones(input_shape) keras_input = np.ones(input_shape) data[:,:,0] = 128.0; data[:,:,1] = 27.0; data[:,:,2] = 200.0; red_bias = -12.0; green_bias = -20; blue_bias = -4; keras_input[:,:,0] = data[:,:,0] + red_bias; keras_input[:,:,1] = data[:,:,1] + green_bias; keras_input[:,:,2] = data[:,:,2] + blue_bias; keras_preds = model.predict(np.expand_dims(keras_input, axis = 0)) keras_preds = np.transpose(keras_preds, [0,3,1,2]).flatten() #convert to coreml and get predictions model_dir = tempfile.mkdtemp() model_path = os.path.join(model_dir, 'keras.mlmodel') from coremltools.converters import keras as keras_converter coreml_model = keras_converter.convert(model, input_names = ['data'], output_names = ['output'], image_input_names = ['data'], red_bias = red_bias, green_bias = green_bias, blue_bias = blue_bias) #coreml_model.save(model_path) #coreml_model = coremltools.models.MLModel(model_path) if is_macos() and macos_version() >= (10, 13): coreml_input_dict = dict() coreml_input_dict["data"] = PIL.Image.fromarray(data.astype(np.uint8)) coreml_preds = coreml_model.predict(coreml_input_dict)['output'].flatten() self.assertEquals(len(keras_preds), len(coreml_preds)) max_relative_error = compare_models(keras_preds, coreml_preds) self.assertAlmostEquals(max(max_relative_error, .001), .001, delta = 1e-6) if os.path.exists(model_dir): shutil.rmtree(model_dir)
def _evaluation_test_helper_no_probability(self, labels, allow_slow): # Generate some random data. # This unit test should not rely on scikit learn for test data. x, y = [], [] random.seed(42) for _ in range(50): x.append([ random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42) ]) y.append(random.choice(labels)) # make sure first label is seen first, second is seen second, and so on. for i, val in enumerate(labels): y[i] = val column_names = ['x1', 'x2', 'x3'] prob = svmutil.svm_problem(y, x) df = pd.DataFrame(x, columns=column_names) for param1 in self.non_kernel_parameters: for param2 in self.kernel_parameters: param_str = ' '.join([self.base_param, param1, param2]) print("PARAMS: ", param_str) param = svm_parameter(param_str) model = svm_train(prob, param) # Get predictions with probabilities as dictionaries (df['prediction'], _, _) = svm_predict(y, x, model, ' -q') spec = libsvm.convert(model, column_names, 'target') if is_macos() and macos_version() >= (10, 13): metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics['num_errors'], 0) if not allow_slow: break if not allow_slow: break
def _train_convert_evaluate_assert(self, **scikit_params): """ Train a scikit-learn model, convert it and then evaluate it with CoreML """ scikit_model = GradientBoostingClassifier(random_state=1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) if is_macos() and macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df['prediction'] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_classifier(spec, df) self._check_metrics(metrics)
def test_pipeline_rename(self): # Convert scikit_spec = converter.convert(self.scikit_model).get_spec() model = MLModel(scikit_spec) sample_data = self.scikit_data.data[0] # Rename rename_feature(scikit_spec, 'input', 'renamed_input') renamed_model = MLModel(scikit_spec) # Check the predictions if is_macos() and macos_version() >= (10, 13): out_dict = model.predict({'input': sample_data}) out_dict_renamed = renamed_model.predict( {'renamed_input': sample_data}) self.assertAlmostEqual(list(out_dict.keys()), list(out_dict_renamed.keys())) self.assertAlmostEqual(list(out_dict.values()), list(out_dict_renamed.values()))
def _evaluation_test_helper_with_probability(self, labels, allow_slow): import copy df = pd.DataFrame(self.x, columns=self.column_names) y = copy.copy(self.y) for i, val in enumerate(labels): y[i] = val probability_param = '-b 1' for param1 in self.non_kernel_parameters: for param2 in self.kernel_parameters: param_str = ' '.join( [self.base_param, param1, param2, probability_param]) # print("PARAMS: ", param_str) param = svm_parameter(param_str) model = svm_train(self.prob, param) # Get predictions with probabilities as dictionaries (df['prediction'], _, probability_lists) = svm_predict(y, self.x, model, probability_param + ' -q') probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df['probabilities'] = probability_dicts spec = libsvm.convert(model, self.column_names, 'target', 'probabilities') if is_macos() and macos_version() >= (10, 13): metrics = evaluate_classifier_with_probabilities( spec, df, verbose=False) self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], 0.00001) if not allow_slow: break if not allow_slow: break
def test_boston_OHE_plus_normalizer(self): data = load_boston() pl = Pipeline([("OHE", OneHotEncoder(categorical_features=[8], sparse=False)), ("Scaler", StandardScaler())]) pl.fit(data.data, data.target) # Convert the model spec = convert(pl, data.feature_names, 'out') if is_macos() and macos_version() >= (10, 13): input_data = [ dict(zip(data.feature_names, row)) for row in data.data ] output_data = [{"out": row} for row in pl.transform(data.data)] result = evaluate_transformer(spec, input_data, output_data) assert result["num_errors"] == 0
def _train_convert_evaluate_assert(self, **xgboost_params): """ Train a scikit-learn model, convert it and then evaluate it with CoreML """ xgb_model = xgboost.XGBClassifier(**xgboost_params) xgb_model.fit(self.X, self.target) # Convert the model spec = xgb_converter.convert(xgb_model, self.feature_names, self.output_name, mode="classifier") if is_macos() and macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df['prediction'] = xgb_model.predict(self.X) # Evaluate it metrics = evaluate_classifier(spec, df) self._check_metrics(metrics)
def _train_convert_evaluate_assert(self, **scikit_params): """ Train a scikit-learn model, convert it and then evaluate it with CoreML """ scikit_model = DecisionTreeRegressor(random_state=1, **scikit_params) scikit_model.fit(self.X, self.target) # Convert the model spec = skl_converter.convert(scikit_model, self.feature_names, self.output_name) if is_macos() and macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) df['prediction'] = scikit_model.predict(self.X) # Evaluate it metrics = evaluate_regressor(spec, df, target='target', verbose=False) self._check_metrics(metrics, scikit_params)
def test_boston_OHE_plus_trees(self): data = load_boston() pl = Pipeline([("OHE", OneHotEncoder(categorical_features=[8], sparse=False)), ("Trees", GradientBoostingRegressor(random_state=1))]) pl.fit(data.data, data.target) # Convert the model spec = convert(pl, data.feature_names, 'target') if is_macos() and macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(data.data, columns=data.feature_names) df['prediction'] = pl.predict(data.data) # Evaluate it result = evaluate_regressor(spec, df, 'target', verbose=False) assert result["max_error"] < 0.0001
def test_input_names(self): data = load_boston() df = pd.DataFrame({'input': data['data'].tolist()}) # Default values spec = libsvm.convert(self.libsvm_model) if is_macos() and macos_version() >= (10, 13): (df['prediction'], _, _) = svmutil.svm_predict(data['target'], data['data'].tolist(), self.libsvm_model) metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0) # One extra parameters. This is legal/possible. num_inputs = len(data['data'][0]) spec = libsvm.convert(self.libsvm_model, input_length=num_inputs + 1) # Not enought input names. input_names = ['this', 'is', 'not', 'enought', 'names'] with self.assertRaises(ValueError): libsvm.convert(self.libsvm_model, input_names=input_names) with self.assertRaises(ValueError): libsvm.convert(self.libsvm_model, input_length=num_inputs - 1)
def create_model(spec): """ Create MLModel with specified types Parameters ---------- spec: Pb spec from 3rd party converted model Returns ------- MLModel """ return coremltools.models.MLModel(spec) @unittest.skipUnless(is_macos() and macos_version() >= (10, 13), 'Only supported on macOS 10.13+') class TestIODataTypes(unittest.TestCase): """ This class tests for different I/O feature data types for an .mlmodel It will cover the following areas to test for: - All features must have a valid type - Multiarrays must have a valid dataType. Inputs must specify shape. Shape must have >= 0 elements - Images must have a valid colorspace. width & height have to be >= 0 - Dictionaries must have a valid key type """ @property def scikit_data(self): return load_boston() def _feature_data_type(self, dtype):
class MLModelTest(unittest.TestCase): @classmethod def setUpClass(self): spec = Model_pb2.Model() spec.specificationVersion = coremltools.SPECIFICATION_VERSION features = ['feature_1', 'feature_2'] output = 'output' for f in features: input_ = spec.description.input.add() input_.name = f input_.type.doubleType.MergeFromString(b'') output_ = spec.description.output.add() output_.name = output output_.type.doubleType.MergeFromString(b'') lr = spec.glmRegressor lr.offset.append(0.1) weights = lr.weights.add() coefs = [1.0, 2.0] for i in coefs: weights.value.append(i) spec.description.predictedFeatureName = 'output' self.spec = spec def test_model_creation(self): model = MLModel(self.spec) self.assertIsNotNone(model) filename = tempfile.mktemp(suffix='.mlmodel') save_spec(self.spec, filename) model = MLModel(filename) self.assertIsNotNone(model) def test_model_api(self): model = MLModel(self.spec) self.assertIsNotNone(model) model.author = 'Test author' self.assertEqual(model.author, 'Test author') self.assertEqual(model.get_spec().description.metadata.author, 'Test author') model.license = 'Test license' self.assertEqual(model.license, 'Test license') self.assertEqual(model.get_spec().description.metadata.license, 'Test license') model.short_description = 'Test model' self.assertEqual(model.short_description, 'Test model') self.assertEqual( model.get_spec().description.metadata.shortDescription, 'Test model') model.input_description['feature_1'] = 'This is feature 1' self.assertEqual(model.input_description['feature_1'], 'This is feature 1') model.output_description['output'] = 'This is output' self.assertEqual(model.output_description['output'], 'This is output') filename = tempfile.mktemp(suffix='.mlmodel') model.save(filename) loaded_model = MLModel(filename) self.assertEqual(model.author, 'Test author') self.assertEqual(model.license, 'Test license') # self.assertEqual(model.short_description, 'Test model') self.assertEqual(model.input_description['feature_1'], 'This is feature 1') self.assertEqual(model.output_description['output'], 'This is output') @unittest.skipUnless(is_macos() and macos_version() >= (10, 13), 'Only supported on macOS 10.13+') def test_predict_api(self): model = MLModel(self.spec) preds = model.predict({'feature_1': 1.0, 'feature_2': 1.0}) self.assertIsNotNone(preds) self.assertEqual(preds['output'], 3.1) @unittest.skipUnless(is_macos() and macos_version() >= (10, 13), 'Only supported on macOS 10.13+') def test_rename_input(self): rename_feature(self.spec, 'feature_1', 'renamed_feature', rename_inputs=True) model = MLModel(self.spec) preds = model.predict({'renamed_feature': 1.0, 'feature_2': 1.0}) self.assertIsNotNone(preds) self.assertEqual(preds['output'], 3.1) # reset the spec for next run rename_feature(self.spec, 'renamed_feature', 'feature_1', rename_inputs=True) @unittest.skipUnless(is_macos() and macos_version() >= (10, 13), 'Only supported on macOS 10.13+') def test_rename_input_bad(self): rename_feature(self.spec, 'blah', 'bad_name', rename_inputs=True) model = MLModel(self.spec) preds = model.predict({'feature_1': 1.0, 'feature_2': 1.0}) self.assertIsNotNone(preds) self.assertEqual(preds['output'], 3.1) @unittest.skipUnless(is_macos() and macos_version() >= (10, 13), 'Only supported on macOS 10.13+') def test_rename_output(self): rename_feature(self.spec, 'output', 'renamed_output', rename_inputs=False, rename_outputs=True) model = MLModel(self.spec) preds = model.predict({'feature_1': 1.0, 'feature_2': 1.0}) self.assertIsNotNone(preds) self.assertEqual(preds['renamed_output'], 3.1) rename_feature(self.spec, 'renamed_output', 'output', rename_inputs=False, rename_outputs=True) @unittest.skipUnless(is_macos() and macos_version() >= (10, 13), 'Only supported on macOS 10.13+') def test_rename_output_bad(self): rename_feature(self.spec, 'blah', 'bad_name', rename_inputs=False, rename_outputs=True) model = MLModel(self.spec) preds = model.predict({'feature_1': 1.0, 'feature_2': 1.0}) self.assertIsNotNone(preds) self.assertEqual(preds['output'], 3.1) @unittest.skipUnless(is_macos() and macos_version() >= (10, 13), 'Only supported on macOS 10.13+') def test_future_version(self): self.spec.specificationVersion = 10000 filename = tempfile.mktemp(suffix='.mlmodel') save_spec(self.spec, filename, auto_set_specification_version=False) model = MLModel(filename) # this model should exist, but throw an exception when we try to use # predict because the engine doesn't support this model version self.assertIsNotNone(model) with self.assertRaises(Exception): try: model.predict({}) except Exception as e: assert 'Core ML model specification version' in str(e) raise self.spec.specificationVersion = 1 @unittest.skipUnless(is_macos() and macos_version() < (10, 13), 'Only supported on macOS 10.13-') def test_MLModel_warning(self): self.spec.specificationVersion = 3 import warnings with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") model = MLModel(self.spec) assert len(w) == 1 assert issubclass(w[-1].category, RuntimeWarning) assert "not able to run predict()" in str(w[-1].message) self.spec.specificationVersion = 1 model = MLModel(self.spec) def test_convert_nn_spec_to_half_precision(self): # simple network with quantization layer input_features = [('data', datatypes.Array(3))] output_features = [('out', datatypes.Array(3))] builder = NeuralNetworkBuilder(input_features, output_features) weights = np.random.uniform(-0.5, 0.5, (3, 3)) builder.add_inner_product(name='inner_product', W=weights, b=None, input_channels=3, output_channels=3, has_bias=False, input_name='data', output_name='out') model = MLModel(builder.spec) spec = convert_neural_network_spec_weights_to_fp16(model.get_spec()) self.assertIsNotNone(spec) # simple network without quantization layer input_features = [('data', datatypes.Array(3))] output_features = [('out', datatypes.Array(3))] builder = NeuralNetworkBuilder(input_features, output_features) builder.add_lrn(name='lrn', input_name='data', output_name='out', alpha=2, beta=3, local_size=1, k=8) model = MLModel(builder.spec) spec = convert_neural_network_spec_weights_to_fp16(model.get_spec()) self.assertIsNotNone(spec) @unittest.skip def test_downgrade_specification_version(self): # manually set a invalid specification version self.spec.specificationVersion = -1 model = MLModel(self.spec) assert model.get_spec().specificationVersion == 1 # manually set a high specification version self.spec.specificationVersion = 4 filename = tempfile.mktemp(suffix='.mlmodel') save_spec(self.spec, filename, auto_set_specification_version=True) model = MLModel(filename) assert model.get_spec().specificationVersion == 1 # simple neural network with only spec 1 layer input_features = [('data', datatypes.Array(3))] output_features = [('out', datatypes.Array(3))] builder = NeuralNetworkBuilder(input_features, output_features) builder.add_activation('relu', 'RELU', 'data', 'out') # set a high specification version builder.spec.specificationVersion = 3 model = MLModel(builder.spec) filename = tempfile.mktemp(suffix='.mlmodel') model.save(filename) # load the model back model = MLModel(filename) assert model.get_spec().specificationVersion == 1 # test save without automatic set specification version self.spec.specificationVersion = 3 filename = tempfile.mktemp(suffix='.mlmodel') save_spec(self.spec, filename, auto_set_specification_version=False) model = MLModel(filename) # the specification version should be original assert model.get_spec().specificationVersion == 3 def test_multiarray_type_convert_to_float(self): input_features = [('data', datatypes.Array(2))] output_features = [('out', datatypes.Array(2))] builder = NeuralNetworkBuilder(input_features, output_features) builder.add_ceil('ceil', 'data', 'out') spec = builder.spec self.assertEqual( spec.description.input[0].type.multiArrayType.dataType, Model_pb2.ArrayFeatureType.DOUBLE) self.assertEqual( spec.description.output[0].type.multiArrayType.dataType, Model_pb2.ArrayFeatureType.DOUBLE) convert_double_to_float_multiarray_type(spec) self.assertEqual( spec.description.input[0].type.multiArrayType.dataType, Model_pb2.ArrayFeatureType.FLOAT32) self.assertEqual( spec.description.output[0].type.multiArrayType.dataType, Model_pb2.ArrayFeatureType.FLOAT32)
def _test_rnn_layer(self, keras_major_version, limit=None): i = 0 layer_name = str(SimpleRNN).split('.')[3].split("'>")[0] numerical_err_models = [] shape_err_models = [] params = list( itertools.product(self.base_layer_params, self.rnn_layer_params)) np.random.shuffle(params) params = [ param for param in params if valid_params(dict(zip(self.params_dict.keys(), param[0]))) ] for base_params, rnn_params in params[:limit]: base_params = dict(zip(self.params_dict.keys(), base_params)) rnn_params = dict( zip(self.simple_rnn_params_dict.keys(), rnn_params)) input_data = generate_input(base_params['input_dims'][0], base_params['input_dims'][1], base_params['input_dims'][2]) model = Sequential() settings = dict( activation=base_params['activation'], return_sequences=base_params['return_sequences'], go_backwards=base_params['go_backwards'], unroll=base_params['unroll'], ) if keras_major_version == 2: model.add( SimpleRNN( base_params['output_dim'], input_shape=base_params['input_dims'][1:], dropout=rnn_params['dropout']['dropout_U'], recurrent_dropout=rnn_params['dropout']['dropout_W'], kernel_regularizer=rnn_params['regularizer'] ['W_regularizer'], recurrent_regularizer=rnn_params['regularizer'] ['U_regularizer'], bias_regularizer=rnn_params['regularizer'] ['b_regularizer'], **settings)) else: model.add( SimpleRNN(base_params['output_dim'], input_length=base_params['input_dims'][1], input_dim=base_params['input_dims'][2], dropout_U=rnn_params['dropout']['dropout_U'], dropout_W=rnn_params['dropout']['dropout_W'], W_regularizer=rnn_params['regularizer'] ['W_regularizer'], U_regularizer=rnn_params['regularizer'] ['U_regularizer'], b_regularizer=rnn_params['regularizer'] ['b_regularizer'], **settings)) model_dir = tempfile.mkdtemp() keras_model_path = os.path.join(model_dir, 'keras.h5') coreml_model_path = os.path.join(model_dir, 'keras.mlmodel') model.save_weights(keras_model_path) mlkitmodel = _get_mlkit_model_from_path(model, coreml_model_path) if is_macos() and macos_version() >= (10, 13): keras_preds = model.predict(input_data).flatten() input_data = np.transpose(input_data, [1, 0, 2]) coreml_preds = mlkitmodel.predict({'data': input_data })['output'].flatten() try: self.assertEquals(coreml_preds.shape, keras_preds.shape) except AssertionError: print( "Shape error:\nbase_params: {}\nkeras_preds.shape: {}\ncoreml_preds.shape: {}" .format(base_params, keras_preds.shape, coreml_preds.shape)) shape_err_models.append(base_params) shutil.rmtree(model_dir) i += 1 continue try: for idx in range(0, len(coreml_preds)): relative_error = (coreml_preds[idx] - keras_preds[idx]) / coreml_preds[idx] self.assertAlmostEqual(relative_error, 0, places=2) except AssertionError: print( "Assertion error:\nbase_params: {}\nkeras_preds: {}\ncoreml_preds: {}" .format(base_params, keras_preds, coreml_preds)) numerical_err_models.append(base_params) shutil.rmtree(model_dir) i += 1 self.assertEquals(shape_err_models, [], msg='Shape error models {}'.format(shape_err_models)) self.assertEquals( numerical_err_models, [], msg='Numerical error models {}'.format(numerical_err_models))
class NearestNeighborsBuilderTest(unittest.TestCase): """ Unit tests for the nearest neighbors builder class. """ def setUp(self): iris_samples = load_iris() self.iris_X = iris_samples.data self.iris_y = iris_samples.target self.training_X = self.iris_X[-30:] self.training_y = self.iris_y[-30:] def tearDown(self): # Do any cleanup here pass def create_builder(self, default_class_label='default_label'): builder = KNearestNeighborsClassifierBuilder( input_name='input', output_name='output', number_of_dimensions=4, default_class_label=default_class_label) return builder def test_builder_output_types(self): builder = self.create_builder(default_class_label='default') self.assertIsNotNone(builder) self.assertTrue( builder.spec.kNearestNeighborsClassifier.HasField( "stringClassLabels")) builder = self.create_builder(default_class_label=12) self.assertIsNotNone(builder) self.assertTrue( builder.spec.kNearestNeighborsClassifier.HasField( "int64ClassLabels")) with self.assertRaises(TypeError): bad_default_label = float(21.32) self.create_builder(default_class_label=bad_default_label) def test_builder_training_input(self): builder = self.create_builder(default_class_label='default') self.assertIsNotNone(builder) self.assertTrue( builder.spec.kNearestNeighborsClassifier.HasField( "stringClassLabels")) self.assertEqual(builder.spec.description.trainingInput[0].name, 'input') self.assertEqual( builder.spec.description.trainingInput[0].type.WhichOneof('Type'), 'multiArrayType') self.assertEqual(builder.spec.description.trainingInput[1].name, 'output') self.assertEqual( builder.spec.description.trainingInput[1].type.WhichOneof('Type'), 'stringType') def test_make_updatable(self): builder = self.create_builder() self.assertIsNotNone(builder) self.assertTrue(builder.spec.isUpdatable) builder.is_updatable = False self.assertFalse(builder.spec.isUpdatable) builder.is_updatable = True self.assertTrue(builder.spec.isUpdatable) def test_author(self): builder = self.create_builder() self.assertIsNotNone(builder) self.assertEqual(builder.spec.description.metadata.author, '') builder.author = 'John Doe' self.assertEqual(builder.author, 'John Doe') self.assertEqual(builder.spec.description.metadata.author, 'John Doe') def test_description(self): builder = self.create_builder() self.assertIsNotNone(builder) self.assertEqual(builder.spec.description.metadata.shortDescription, '') builder.description = 'This is a description' self.assertEqual(builder.description, 'This is a description') self.assertEqual(builder.spec.description.metadata.shortDescription, 'This is a description') def test_weighting_scheme(self): builder = self.create_builder() self.assertIsNotNone(builder) builder.weighting_scheme = 'uniform' self.assertEqual(builder.weighting_scheme, 'uniform') builder.weighting_scheme = 'inverse_distance' self.assertEqual(builder.weighting_scheme, 'inverse_distance') builder.weighting_scheme = 'unIfOrM' self.assertEqual(builder.weighting_scheme, 'uniform') builder.weighting_scheme = 'InVerSE_DISTance' self.assertEqual(builder.weighting_scheme, 'inverse_distance') with self.assertRaises(TypeError): builder.weighting_scheme = 'test' def test_index_type(self): builder = self.create_builder() self.assertIsNotNone(builder) self.assertEqual(builder.index_type, 'linear') self.assertEqual(builder.leaf_size, 0) builder.set_index_type('kd_tree') self.assertEqual(builder.index_type, 'kd_tree') # test default value self.assertEqual(builder.leaf_size, 30) builder.set_index_type('linear') self.assertEqual(builder.index_type, 'linear') self.assertEqual(builder.leaf_size, 0) builder.set_index_type('kd_tree', leaf_size=45) # test user-defined value self.assertEqual(builder.index_type, 'kd_tree') self.assertEqual(builder.leaf_size, 45) builder.set_index_type('linear', leaf_size=37) self.assertEqual(builder.index_type, 'linear') self.assertEqual(builder.leaf_size, 0) builder.set_index_type('KD_TrEe', leaf_size=22) # test user-defined value self.assertEqual(builder.index_type, 'kd_tree') self.assertEqual(builder.leaf_size, 22) builder.set_index_type('linEAR') self.assertEqual(builder.index_type, 'linear') self.assertEqual(builder.leaf_size, 0) with self.assertRaises(TypeError): builder.set_index_type('unsupported_index') with self.assertRaises(TypeError): builder.set_index_type('kd_tree', -10) with self.assertRaises(TypeError): builder.set_index_type('kd_tree', 0) def test_leaf_size(self): builder = self.create_builder() self.assertIsNotNone(builder) builder.set_index_type('kd_tree', leaf_size=45) # test user-defined value self.assertEqual(builder.index_type, 'kd_tree') self.assertEqual(builder.leaf_size, 45) builder.leaf_size = 12 self.assertEqual(builder.index_type, 'kd_tree') self.assertEqual(builder.leaf_size, 12) def test_set_number_of_neighbors_with_bounds(self): builder = self.create_builder() self.assertIsNotNone(builder) self.assertEqual(builder.number_of_neighbors, 5) (min_value, max_value) = builder.number_of_neighbors_allowed_range() self.assertEqual(min_value, 1) self.assertEqual(max_value, 1000) builder.set_number_of_neighbors_with_bounds(12, allowed_range=(2, 24)) (min_value, max_value) = builder.number_of_neighbors_allowed_range() self.assertEqual(builder.number_of_neighbors, 12) self.assertEqual(min_value, 2) self.assertEqual(max_value, 24) allowed_values = builder.number_of_neighbors_allowed_set() self.assertIsNone(allowed_values) test_set = {3, 5, 7, 9} builder.set_number_of_neighbors_with_bounds(7, allowed_set=test_set) self.assertEqual(builder.number_of_neighbors, 7) allowed_values = builder.number_of_neighbors_allowed_set() self.assertIsNotNone(allowed_values) self.assertEqual(allowed_values, test_set) def test_set_number_of_neighbors_with_bounds_error_conditions(self): builder = self.create_builder() self.assertIsNotNone(builder) with self.assertRaises(ValueError): builder.set_number_of_neighbors_with_bounds(3) test_range = (3, 15) test_set = {1, 3, 5} with self.assertRaises(ValueError): builder.set_number_of_neighbors_with_bounds( 3, allowed_range=test_range, allowed_set=test_set) with self.assertRaises(ValueError): builder.set_number_of_neighbors_with_bounds(3, allowed_range=(-5, 5)) with self.assertRaises(ValueError): builder.set_number_of_neighbors_with_bounds(3, allowed_range=(5, 1)) with self.assertRaises(ValueError): builder.set_number_of_neighbors_with_bounds( 3, allowed_range=test_range, allowed_set=test_set) with self.assertRaises(ValueError): builder.set_number_of_neighbors_with_bounds( 2, allowed_range=test_range) with self.assertRaises(TypeError): builder.set_number_of_neighbors_with_bounds(5, allowed_set={5, -3, 7}) with self.assertRaises(ValueError): builder.set_number_of_neighbors_with_bounds(4, allowed_set=test_set) with self.assertRaises(ValueError): builder.set_number_of_neighbors_with_bounds(4, allowed_set=test_set) with self.assertRaises(TypeError): builder.set_number_of_neighbors_with_bounds(2, allowed_set=[1, 2, 3]) with self.assertRaises(TypeError): builder.set_number_of_neighbors_with_bounds(4, allowed_range={2, 200}) with self.assertRaises(TypeError): builder.set_number_of_neighbors_with_bounds(4, allowed_range=(2, 10, 20)) with self.assertRaises(TypeError): builder.set_number_of_neighbors_with_bounds(4, allowed_set=set()) with self.assertRaises(TypeError): builder.set_number_of_neighbors_with_bounds(4, allowed_range=[]) def test_set_number_of_neighbors(self): builder = self.create_builder() self.assertIsNotNone(builder) builder.set_number_of_neighbors_with_bounds(12, allowed_range=(2, 24)) self.assertEqual(builder.number_of_neighbors, 12) with self.assertRaises(ValueError): builder.set_number_of_neighbors_with_bounds(1, allowed_range=(2, 24)) builder.set_number_of_neighbors_with_bounds(4, allowed_range=(2, 24)) self.assertEqual(builder.number_of_neighbors, 4) test_set = {3, 5, 7, 9} builder.set_number_of_neighbors_with_bounds(7, allowed_set=test_set) with self.assertRaises(ValueError): builder.set_number_of_neighbors_with_bounds(4, allowed_set=test_set) builder.set_number_of_neighbors_with_bounds(5, allowed_set=test_set) self.assertEqual(builder.number_of_neighbors, 5) def test_add_samples_invalid_data(self): builder = self.create_builder() self.assertIsNotNone(builder) invalid_X = [[1.0, 2.4]] with self.assertRaises(TypeError): builder.add_samples(invalid_X, self.training_y) with self.assertRaises(TypeError): builder.add_samples(self.training_X, self.training_y[:3]) with self.assertRaises(TypeError): builder.add_samples([], self.training_y) with self.assertRaises(TypeError): builder.add_samples(self.training_X, []) def test_add_samples_int_labels(self): builder = self.create_builder(default_class_label=12) self.assertIsNotNone(builder) some_X = self.training_X[:10] some_y = self.training_y[:10] builder.add_samples(some_X, some_y) self._validate_samples(builder.spec, some_X, some_y) addl_X = self.training_X[10:20] addl_y = self.training_y[10:20] builder.add_samples(addl_X, addl_y) self._validate_samples(builder.spec, self.training_X[:20], self.training_y[:20]) def test_add_samples_string_labels(self): builder = self.create_builder(default_class_label='default') self.assertIsNotNone(builder) some_X = self.training_X[:3] some_y = ['one', 'two', 'three'] builder.add_samples(some_X, some_y) self._validate_samples(builder.spec, some_X, some_y) addl_X = self.training_X[3:6] addl_y = ['four', 'five', 'six'] builder.add_samples(addl_X, addl_y) self._validate_samples(builder.spec, self.training_X[0:6], some_y + addl_y) def test_add_samples_invalid_label_types(self): builder_int_labels = self.create_builder(default_class_label=42) self.assertIsNotNone(builder_int_labels) some_X = self.training_X[:3] invalid_int_y = [0, 'one', 2] with self.assertRaises(TypeError): builder_int_labels.add_samples(some_X, invalid_int_y) builder_string_labels = self.create_builder( default_class_label='default') self.assertIsNotNone(builder_string_labels) invalid_string_y = ['zero', 'one', 2] with self.assertRaises(TypeError): builder_string_labels.add_samples(some_X, invalid_string_y) @unittest.skipUnless(is_macos(), 'Only supported on MacOS platform.') def test_can_init_and_save_model_from_builder_with_updated_spec(self): builder = KNearestNeighborsClassifierBuilder( input_name='input', output_name='output', number_of_dimensions=10, default_class_label='defaultLabel', k=3, weighting_scheme='inverse_distance', index_type='kd_tree', leaf_size=50) builder.author = 'CoreML Team' builder.license = 'MIT' builder.description = 'test_builder_with_validation' # Save the updated spec coreml_model = MLModel(builder.spec) self.assertIsNotNone(coreml_model) coreml_model_path = '/tmp/__test_builder_with_validation.mlmodel' try: coreml_model.save(coreml_model_path) self.assertTrue(os.path.isfile(coreml_model_path)) finally: self._delete_mlmodel_and_mlmodelc(coreml_model_path) @unittest.skipUnless(is_macos(), 'Only supported on MacOS platform.') def test_can_init_and_save_model_from_builder_default_parameters(self): builder = KNearestNeighborsClassifierBuilder( input_name='input', output_name='output', number_of_dimensions=4, default_class_label='defaultLabel') # Save the updated spec coreml_model = MLModel(builder.spec) self.assertIsNotNone(coreml_model) coreml_model_path = '/tmp/__test_builder_with_validation.mlmodel' try: coreml_model.save(coreml_model_path) self.assertTrue(os.path.isfile(coreml_model_path)) finally: self._delete_mlmodel_and_mlmodelc(coreml_model_path) def _validate_samples(self, spec, expected_X, expected_y): """Validate the float samples returned from the converted scikit KNeighborsClassifier""" num_dimensions = spec.kNearestNeighborsClassifier.nearestNeighborsIndex.numberOfDimensions for index, sample in enumerate(spec.kNearestNeighborsClassifier. nearestNeighborsIndex.floatSamples): for dim in range(0, num_dimensions): self.assertAlmostEqual(sample.vector[dim], expected_X[index][dim], places=6) if spec.kNearestNeighborsClassifier.HasField("int64ClassLabels"): for index, label in enumerate( spec.kNearestNeighborsClassifier.int64ClassLabels.vector): self.assertEqual(label, expected_y[index]) elif spec.kNearestNeighborsClassifier.HasField("stringClassLabels"): for index, label in enumerate( spec.kNearestNeighborsClassifier.stringClassLabels.vector): self.assertEqual(label, expected_y[index]) @staticmethod def _delete_mlmodel_and_mlmodelc(path_to_mlmodel): """Delete the .mlmodel and .mlmodelc for the given .mlmodel.""" if os.path.exists(path_to_mlmodel): os.remove(path_to_mlmodel) path_to_mlmodelc = '{}c'.format(path_to_mlmodel) if os.path.exists(path_to_mlmodelc): shutil.rmtree(path_to_mlmodelc)
class LinearRegressionScikitTest(unittest.TestCase): """ Unit test class for testing scikit-learn converter. """ @classmethod def setUpClass(self): """ Set up the unit test by loading the dataset and training a model. """ from sklearn.datasets import load_boston from sklearn.linear_model import LinearRegression scikit_data = load_boston() scikit_model = LinearRegression() scikit_model.fit(scikit_data['data'], scikit_data['target']) # Save the data and the model self.scikit_data = scikit_data self.scikit_model = scikit_model def test_conversion(self): input_names = self.scikit_data.feature_names spec = convert(self.scikit_model, input_names, 'target').get_spec() self.assertIsNotNone(spec) # Test the model class self.assertIsNotNone(spec.description) # Test the interface class self.assertEquals(spec.description.predictedFeatureName, 'target') # Test the inputs and outputs self.assertEquals(len(spec.description.output), 1) self.assertEquals(spec.description.output[0].name, 'target') self.assertEquals(spec.description.output[0].type.WhichOneof('Type'), 'doubleType') for input_type in spec.description.input: self.assertEquals(input_type.type.WhichOneof('Type'), 'doubleType') self.assertEqual(sorted(input_names), sorted(map(lambda x: x.name, spec.description.input))) # Test the linear regression parameters. self.assertTrue(spec.pipelineRegressor.pipeline.models[-1].HasField( 'glmRegressor')) lr = spec.pipelineRegressor.pipeline.models[-1].glmRegressor self.assertEquals(lr.offset, self.scikit_model.intercept_) self.assertEquals(len(lr.weights), 1) self.assertEquals(len(lr.weights[0].value), 13) i = 0 for w in lr.weights[0].value: self.assertAlmostEqual(w, self.scikit_model.coef_[i]) i = i + 1 def test_conversion_bad_inputs(self): # Error on converting an untrained model with self.assertRaises(TypeError): model = LinearRegression() spec = convert(model, 'data', 'out') # Check the expected class during covnersion. from sklearn.preprocessing import OneHotEncoder with self.assertRaises(TypeError): model = OneHotEncoder() spec = convert(model, 'data', 'out') @unittest.skipUnless(is_macos() and macos_version() >= (10, 13), 'Only supported on macOS 10.13+') def test_linear_regression_evaluation(self): """ Check that the evaluation results are the same in scikit learn and coremltools """ input_names = self.scikit_data.feature_names df = pd.DataFrame(self.scikit_data.data, columns=input_names) for normalize_value in (True, False): cur_model = LinearRegression(normalize=normalize_value) cur_model.fit(self.scikit_data['data'], self.scikit_data['target']) spec = convert(cur_model, input_names, 'target') df['prediction'] = cur_model.predict(self.scikit_data.data) metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0) @unittest.skipUnless(is_macos() and macos_version() >= (10, 13), 'Only supported on macOS 10.13+') def test_linear_svr_evaluation(self): """ Check that the evaluation results are the same in scikit learn and coremltools """ ARGS = [{}, { 'C': 0.5, 'epsilon': 0.25 }, { 'dual': False, 'loss': 'squared_epsilon_insensitive' }, { 'tol': 0.005 }, { 'fit_intercept': False }, { 'intercept_scaling': 1.5 }] input_names = self.scikit_data.feature_names df = pd.DataFrame(self.scikit_data.data, columns=input_names) for cur_args in ARGS: print(cur_args) cur_model = LinearSVR(**cur_args) cur_model.fit(self.scikit_data['data'], self.scikit_data['target']) spec = convert(cur_model, input_names, 'target') df['prediction'] = cur_model.predict(self.scikit_data.data) metrics = evaluate_regressor(spec, df) self.assertAlmostEquals(metrics['max_error'], 0)