def test_lightgbm_classifier(self): """ test converting lightgbm model """ gbm = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, max_depth=6, learning_rate=0.1, n_estimators=10, objective='multiclass') gbm.fit(self.data.X, self.data.y, eval_metric='logloss', feature_name=self.data.features) model_output = os.path.join(self.data.base_path, "lightgbm_classifier.txt") gbm.booster_.save_model(model_output) self.assertTrue(os.path.exists(model_output)) pmml_output = os.path.join(self.data.base_path, "lightgbm_classifier.pmml") ppmml.to_pmml(model_input=model_output, pmml_output=pmml_output, model_type='lightgbm', options={ 'compact': True, 'target-name': 'y' }) self.assertTrue(os.path.exists(pmml_output)) # validate pmml file data_output = os.path.join(self.data.base_path, "lightgbm_classifier.csv") ppmml.predict(pmml_output, self.data.test_data_input, data_output) self.assertTrue(os.path.exists(data_output))
def test_linear_classifier(self): """ test converting LinearClassifer model """ algorithm_name = "linear_classifer" model_output = os.path.join(self.data.base_path, "{}".format(algorithm_name)) classifier = LinearClassifier( feature_columns=self._iris_dnn_features(), n_classes=3, optimizer=tf.train.AdamOptimizer, config=self.estimator_conf) self._generate_tf_model(classifier, model_output) self.assertTrue(os.path.exists(model_output)) pmml_output = os.path.join(self.data.base_path, "{}.pmml".format(algorithm_name)) ppmml.to_pmml(model_input=model_output, pmml_output=pmml_output, model_type='tensorflow') self.assertTrue(os.path.exists(pmml_output)) # validate pmml file data_output = os.path.join(self.data.base_path, "{}.csv".format(algorithm_name)) ppmml.predict(pmml_output, self.data.test_data_input, data_output) self.assertTrue(os.path.exists(data_output))
def test_r_converter(self): """ test converting r model """ pmml_output = os.path.join(self.base_path, "r_model.pmml") ppmml.to_pmml(model_input=self.r_model_path, pmml_output=pmml_output, model_type='r') self.assertTrue(os.path.exists(pmml_output))
def _to_pmml_and_validate(self, algo_name): """ to pmml and validate Args: algo_name: string, the algorithm name """ model_output = os.path.join(self.base_path, "{}_model".format(algo_name)) schema_output = os.path.join(self.base_path, "{}.json".format(algo_name)) pmml_output = os.path.join(self.base_path, "{}.pmml".format(algo_name)) ppmml.to_pmml(model_output, pmml_output, schema_input=schema_output, model_type="spark") self.assertTrue(os.path.exists(pmml_output)) # validate pmml file if not os.path.exists(self.test_data_path): logging.info("generate test data: {}".format(self.test_data_path)) self._generate_test_data(self.test_data_path) data_output = os.path.join(self.base_path, "{}.csv".format(algo_name)) ppmml.predict(pmml_output, self.test_data_path, data_output) self.assertTrue(os.path.exists(data_output))
def _to_pmml_and_validate(self, estimator, algo_name): """ to pmml and validate Args: estimator: sklearn estimator algo_name: string, the algorithm name """ model_output = os.path.join(self.data.base_path, "{}.pkl.z".format(algo_name)) pmml_output = os.path.join(self.data.base_path, "{}.pmml".format(algo_name)) joblib.dump(estimator, model_output, compress=9) ppmml.to_pmml(model_output, pmml_output, model_type="sklearn") self.assertTrue(os.path.exists(pmml_output)) # validate pmml file data_output = os.path.join(self.data.base_path, "{}.csv".format(algo_name)) ppmml.predict(pmml_output, self.data.test_data_input, data_output) self.assertTrue(os.path.exists(data_output))
def test_exceptions(self): """ test exceptions """ with self.assertRaises(ValueError): ppmml.to_pmml("non_existent_path", "./pmml_output.pmml")