def test_default_names(self): df = pd.DataFrame({'input': self.x}) # Test with probabilities spec = libsvm.convert(self.libsvm_model).get_spec() (_, _, probability_lists) = svm_predict(self.y, self.x, self.libsvm_model, '-b 1 -q') probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df['classProbability'] = probability_dicts metrics = evaluate_classifier_with_probabilities( spec, df, verbose=False, probabilities='classProbability') self.assertLess(metrics['max_probability_error'], 0.00001) # Test model without probabilities no_probability_model = svmutil.svm_train(self.prob, svmutil.svm_parameter()) spec = libsvm.convert(no_probability_model).get_spec() self.assertEqual(len(spec.description.output), 1) self.assertEqual(spec.description.output[0].name, u'target') (df['prediction'], _, _) = svm_predict(self.y, self.x, no_probability_model, ' -q') metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics['num_errors'], 0)
def test_default_names(self): df = pd.DataFrame({"input": self.x}) df["input"] = df["input"].apply(np.array) # Test with probabilities spec = libsvm.convert(self.libsvm_model).get_spec() if _is_macos() and _macos_version() >= (10, 13): (_, _, probability_lists) = svm_predict(self.y, self.x, self.libsvm_model, "-b 1 -q") probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df["classProbability"] = probability_dicts metrics = evaluate_classifier_with_probabilities( spec, df, verbose=False, probabilities="classProbability") self.assertLess(metrics["max_probability_error"], 0.00001) # Test model without probabilities no_probability_model = svmutil.svm_train(self.prob, svmutil.svm_parameter()) spec = libsvm.convert(no_probability_model).get_spec() self.assertEqual(len(spec.description.output), 1) self.assertEqual(spec.description.output[0].name, u"target") if _is_macos() and _macos_version() >= (10, 13): (df["prediction"], _, _) = svm_predict(self.y, self.x, no_probability_model, " -q") metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics["num_errors"], 0)
def _evaluation_test_helper_with_probability(self, labels, allow_slow): import copy df = pd.DataFrame(self.x, columns=self.column_names) y = copy.copy(self.y) for i, val in enumerate(labels): y[i] = val probability_param = '-b 1' for param1 in self.non_kernel_parameters: for param2 in self.kernel_parameters: param_str = ' '.join([self.base_param, param1, param2, probability_param]) # print("PARAMS: ", param_str) param = svm_parameter(param_str) model = svm_train(self.prob, param) # Get predictions with probabilities as dictionaries (df['prediction'], _, probability_lists) = svm_predict(y, self.x, model, probability_param + ' -q') probability_dicts = [dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists] df['probabilities'] = probability_dicts spec = libsvm.convert(model, self.column_names, 'target', 'probabilities') if macos_version() >= (10, 13): metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False) self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], 0.00001) if not allow_slow: break if not allow_slow: break
def _conversion_and_evaluation_helper_for_logistic_regression(self, class_labels): options = { 'C': (0.1, 1., 2.), 'fit_intercept': (True, False), 'class_weight': ('balanced', None), 'solver': ('newton-cg', 'lbfgs', 'liblinear', 'sag') } # Generate a list of all combinations of options and the default parameters product = itertools.product(*options.values()) args = [{}] + [dict(zip(options.keys(), p)) for p in product] x, y = GlmCassifierTest._generate_random_data(class_labels) column_names = ['x1', 'x2'] df = pd.DataFrame(x, columns=column_names) for cur_args in args: print(class_labels, cur_args) cur_model = LogisticRegression(**cur_args) cur_model.fit(x, y) spec = convert(cur_model, input_features=column_names, output_feature_names='target') if is_macos() and macos_version() >= (10, 13): probability_lists = cur_model.predict_proba(x) df['classProbability'] = [dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists] metrics = evaluate_classifier_with_probabilities(spec, df, probabilities='classProbability', verbose=False) self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], 0.00001)
def _test_prob_model(self, param1, param2): probability_param = '-b 1' df = self.df param_str = ' '.join( [self.base_param, param1, param2, probability_param]) param = svm_parameter(param_str) model = svm_train(self.prob, param) # Get predictions with probabilities as dictionaries (df['prediction'], _, probability_lists) = svm_predict(self.y, self.x, model, probability_param + ' -q') probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df['probabilities'] = probability_dicts spec = libsvm.convert(model, self.column_names, 'target', 'probabilities') if macos_version() >= (10, 13): metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False) self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], 0.00001)
def _train_convert_evaluate_assert(self, **xgboost_params): """ Train a scikit-learn model, convert it and then evaluate it with CoreML """ xgb_model = xgboost.XGBClassifier(**xgboost_params) xgb_model.fit(self.X, self.target) # Convert the model spec = xgb_converter.convert(xgb_model, self.feature_names, self.output_name, mode="classifier") if _is_macos() and _macos_version() >= (10, 13): # Get predictions df = pd.DataFrame(self.X, columns=self.feature_names) probabilities = xgb_model.predict_proba(self.X) df["classProbability"] = [ dict(zip(xgb_model.classes_, cur_vals)) for cur_vals in probabilities ] metrics = evaluate_classifier_with_probabilities( spec, df, probabilities="classProbability", verbose=False) self.assertEqual(metrics["num_key_mismatch"], 0) self.assertLess(metrics["max_probability_error"], 1e-3)
def _evaluation_test_helper(self, class_labels, use_probability_estimates, allow_slow, allowed_prob_delta=0.00001): # Parameters to test kernel_parameters = [{}, {'kernel': 'rbf', 'gamma': 1.2}, {'kernel': 'linear'}, {'kernel': 'poly'}, {'kernel': 'poly', 'degree': 2}, {'kernel': 'poly', 'gamma': 0.75}, {'kernel': 'poly', 'degree': 0, 'gamma': 0.9, 'coef0':2}, {'kernel': 'sigmoid'}, {'kernel': 'sigmoid', 'gamma': 1.3}, {'kernel': 'sigmoid', 'coef0': 0.8}, {'kernel': 'sigmoid', 'coef0': 0.8, 'gamma': 0.5} ] non_kernel_parameters = [{}, {'C': 1}, {'C': 1.5, 'shrinking': True}, {'C': 0.5, 'shrinking': False}] # Generate some random data x, y = [], [] random.seed(42) for _ in range(50): x.append([random.gauss(200,30), random.gauss(-100,22), random.gauss(100,42)]) y.append(random.choice(class_labels)) column_names = ['x1', 'x2', 'x3'] # make sure first label is seen first, second is seen second, and so on. for i, val in enumerate(class_labels): y[i] = val df = pd.DataFrame(x, columns=column_names) # Test for param1 in non_kernel_parameters: for param2 in kernel_parameters: cur_params = param1.copy() cur_params.update(param2) cur_params['probability'] = use_probability_estimates cur_params['max_iter'] = 10 # Don't want test to take too long print("cur_params=" + str(cur_params)) cur_model = SVC(**cur_params) cur_model.fit(x, y) spec = scikit_converter.convert(cur_model, column_names, 'target') if macos_version() >= (10, 13): if use_probability_estimates: probability_lists = cur_model.predict_proba(x) df['classProbability'] = [dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists] metrics = evaluate_classifier_with_probabilities(spec, df, probabilities='classProbability', verbose=True) self.assertEquals(metrics['num_key_mismatch'], 0) self.assertLess(metrics['max_probability_error'], allowed_prob_delta) else: df['prediction'] = cur_model.predict(x) metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics['num_errors'], 0) if not allow_slow: break if not allow_slow: break
def _test_prob_model(self, param1, param2): probability_param = "-b 1" df = self.df param_str = " ".join([self.base_param, param1, param2, probability_param]) param = svmutil.svm_parameter(param_str) model = svm_train(self.prob, param) # Get predictions with probabilities as dictionaries (df["prediction"], _, probability_lists) = svm_predict( self.y, self.x, model, probability_param + " -q" ) probability_dicts = [ dict(zip([1, 2], cur_vals)) for cur_vals in probability_lists ] df["probabilities"] = probability_dicts spec = libsvm.convert(model, self.column_names, "target", "probabilities") if _is_macos() and _macos_version() >= (10, 13): metrics = evaluate_classifier_with_probabilities(spec, df, verbose=False) self.assertEqual(metrics["num_key_mismatch"], 0) self.assertLess(metrics["max_probability_error"], 0.00001)
def _conversion_and_evaluation_helper_for_logistic_regression( self, class_labels): options = { "C": (0.1, 1.0, 2.0), "fit_intercept": (True, False), "class_weight": ("balanced", None), "solver": ("newton-cg", "lbfgs", "liblinear", "sag"), } # Generate a list of all combinations of options and the default parameters product = itertools.product(*options.values()) args = [{}] + [dict(zip(options.keys(), p)) for p in product] x, y = GlmCassifierTest._generate_random_data(class_labels) column_names = ["x1", "x2"] df = pd.DataFrame(x, columns=column_names) for cur_args in args: print(class_labels, cur_args) cur_model = LogisticRegression(**cur_args) cur_model.fit(x, y) spec = convert(cur_model, input_features=column_names, output_feature_names="target") if _is_macos() and _macos_version() >= (10, 13): probability_lists = cur_model.predict_proba(x) df["classProbability"] = [ dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists ] metrics = evaluate_classifier_with_probabilities( spec, df, probabilities="classProbability", verbose=False) self.assertEquals(metrics["num_key_mismatch"], 0) self.assertLess(metrics["max_probability_error"], 0.00001)
def _evaluation_test_helper( self, class_labels, use_probability_estimates, allow_slow, allowed_prob_delta=0.00001, ): # Parameters to test kernel_parameters = [ {}, { "kernel": "rbf", "gamma": 1.2 }, { "kernel": "linear" }, { "kernel": "poly" }, { "kernel": "poly", "degree": 2 }, { "kernel": "poly", "gamma": 0.75 }, { "kernel": "poly", "degree": 0, "gamma": 0.9, "coef0": 2 }, { "kernel": "sigmoid" }, { "kernel": "sigmoid", "gamma": 1.3 }, { "kernel": "sigmoid", "coef0": 0.8 }, { "kernel": "sigmoid", "coef0": 0.8, "gamma": 0.5 }, ] non_kernel_parameters = [ {}, { "C": 1 }, { "C": 1.5, "shrinking": True }, { "C": 0.5, "shrinking": False }, ] # Generate some random data x, y = [], [] random.seed(42) for _ in range(50): x.append([ random.gauss(200, 30), random.gauss(-100, 22), random.gauss(100, 42) ]) y.append(random.choice(class_labels)) column_names = ["x1", "x2", "x3"] # make sure first label is seen first, second is seen second, and so on. for i, val in enumerate(class_labels): y[i] = val df = pd.DataFrame(x, columns=column_names) # Test for param1 in non_kernel_parameters: for param2 in kernel_parameters: cur_params = param1.copy() cur_params.update(param2) cur_params["probability"] = use_probability_estimates cur_params["max_iter"] = 10 # Don't want test to take too long print("cur_params=" + str(cur_params)) cur_model = SVC(**cur_params) cur_model.fit(x, y) spec = scikit_converter.convert(cur_model, column_names, "target") if _is_macos() and _macos_version() >= (10, 13): if use_probability_estimates: probability_lists = cur_model.predict_proba(x) df["classProbability"] = [ dict(zip(cur_model.classes_, cur_vals)) for cur_vals in probability_lists ] metrics = evaluate_classifier_with_probabilities( spec, df, probabilities="classProbability", verbose=True) self.assertEquals(metrics["num_key_mismatch"], 0) self.assertLess(metrics["max_probability_error"], allowed_prob_delta) else: df["prediction"] = cur_model.predict(x) metrics = evaluate_classifier(spec, df, verbose=False) self.assertEquals(metrics["num_errors"], 0) if not allow_slow: break if not allow_slow: break