def make_custom_export_strategy(name, convert_fn, feature_columns, export_input_fn): """Makes custom exporter of GTFlow tree format. Args: name: A string, for the name of the export strategy. convert_fn: A function that converts the tree proto to desired format and saves it to the desired location. Can be None to skip conversion. feature_columns: A list of feature columns. export_input_fn: A function that takes no arguments and returns an `InputFnOps`. Returns: An `ExportStrategy`. """ base_strategy = saved_model_export_utils.make_export_strategy( serving_input_fn=export_input_fn) input_fn = export_input_fn() (sorted_feature_names, dense_floats, sparse_float_indices, _, _, sparse_int_indices, _, _) = gbdt_batch.extract_features(input_fn.features, feature_columns) def export_fn(estimator, export_dir, checkpoint_path=None, eval_result=None): """A wrapper to export to SavedModel, and convert it to other formats.""" result_dir = base_strategy.export(estimator, export_dir, checkpoint_path, eval_result) with ops.Graph().as_default() as graph: with tf_session.Session(graph=graph) as sess: saved_model_loader.load(sess, [tag_constants.SERVING], result_dir) # Note: This is GTFlow internal API and might change. ensemble_model = graph.get_operation_by_name( "ensemble_model/TreeEnsembleSerialize") _, dfec_str = sess.run(ensemble_model.outputs) dtec = tree_config_pb2.DecisionTreeEnsembleConfig() dtec.ParseFromString(dfec_str) # Export the result in the same folder as the saved model. if convert_fn: convert_fn(dtec, sorted_feature_names, len(dense_floats), len(sparse_float_indices), len(sparse_int_indices), result_dir, eval_result) feature_importances = _get_feature_importances( dtec, sorted_feature_names, len(dense_floats), len(sparse_float_indices), len(sparse_int_indices)) sorted_by_importance = sorted(feature_importances.items(), key=lambda x: -x[1]) assets_dir = os.path.join(result_dir, "assets.extra") gfile.MakeDirs(assets_dir) with gfile.GFile( os.path.join(assets_dir, "feature_importances"), "w") as f: f.write("\n".join("%s, %f" % (k, v) for k, v in sorted_by_importance)) return result_dir return export_strategy.ExportStrategy(name, export_fn)
def make_custom_export_strategy(name, convert_fn, feature_columns, export_input_fn): """Makes custom exporter of GTFlow tree format. Args: name: A string, for the name of the export strategy. convert_fn: A function that converts the tree proto to desired format and saves it to the desired location. Can be None to skip conversion. feature_columns: A list of feature columns. export_input_fn: A function that takes no arguments and returns an `InputFnOps`. Returns: An `ExportStrategy`. """ base_strategy = saved_model_export_utils.make_export_strategy( serving_input_fn=export_input_fn) input_fn = export_input_fn() (sorted_feature_names, dense_floats, sparse_float_indices, _, _, sparse_int_indices, _, _) = gbdt_batch.extract_features( input_fn.features, feature_columns) def export_fn(estimator, export_dir, checkpoint_path=None, eval_result=None): """A wrapper to export to SavedModel, and convert it to other formats.""" result_dir = base_strategy.export(estimator, export_dir, checkpoint_path, eval_result) with ops.Graph().as_default() as graph: with tf_session.Session(graph=graph) as sess: saved_model_loader.load( sess, [tag_constants.SERVING], result_dir) # Note: This is GTFlow internal API and might change. ensemble_model = graph.get_operation_by_name( "ensemble_model/TreeEnsembleSerialize") _, dfec_str = sess.run(ensemble_model.outputs) dtec = tree_config_pb2.DecisionTreeEnsembleConfig() dtec.ParseFromString(dfec_str) # Export the result in the same folder as the saved model. if convert_fn: convert_fn(dtec, sorted_feature_names, len(dense_floats), len(sparse_float_indices), len(sparse_int_indices), result_dir, eval_result) feature_importances = _get_feature_importances( dtec, sorted_feature_names, len(dense_floats), len(sparse_float_indices), len(sparse_int_indices)) sorted_by_importance = sorted( feature_importances.items(), key=lambda x: -x[1]) assets_dir = os.path.join(result_dir, "assets.extra") gfile.MakeDirs(assets_dir) with gfile.GFile(os.path.join(assets_dir, "feature_importances"), "w") as f: f.write("\n".join("%s, %f" % (k, v) for k, v in sorted_by_importance)) return result_dir return export_strategy.ExportStrategy( name, export_fn, strip_default_attrs=True)
def testExtractFeaturesWithTransformation(self): """Tests feature extraction.""" with self.test_session(): features = {} features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32) features["sparse_float"] = sparse_tensor.SparseTensor( array_ops.zeros([2, 2], dtypes.int64), array_ops.zeros([2], dtypes.float32), array_ops.zeros([2], dtypes.int64)) features["sparse_categorical"] = sparse_tensor.SparseTensor( array_ops.zeros([2, 2], dtypes.int64), array_ops.zeros([2], dtypes.string), array_ops.zeros([2], dtypes.int64)) feature_columns = set() feature_columns.add(layers.real_valued_column("dense_float")) feature_columns.add( layers.feature_column._real_valued_var_len_column( "sparse_float", is_sparse=True)) feature_columns.add( feature_column_lib.sparse_column_with_hash_bucket( "sparse_categorical", hash_bucket_size=1000000)) (fc_names, dense_floats, sparse_float_indices, sparse_float_values, sparse_float_shapes, sparse_int_indices, sparse_int_values, sparse_int_shapes) = (gbdt_batch.extract_features( features, feature_columns)) self.assertEqual(len(fc_names), 3) self.assertAllEqual( fc_names, ["dense_float", "sparse_float", "sparse_categorical"]) self.assertEqual(len(dense_floats), 1) self.assertEqual(len(sparse_float_indices), 1) self.assertEqual(len(sparse_float_values), 1) self.assertEqual(len(sparse_float_shapes), 1) self.assertEqual(len(sparse_int_indices), 1) self.assertEqual(len(sparse_int_values), 1) self.assertEqual(len(sparse_int_shapes), 1) self.assertAllEqual(dense_floats[0].eval(), features["dense_float"].eval()) self.assertAllEqual(sparse_float_indices[0].eval(), features["sparse_float"].indices.eval()) self.assertAllEqual(sparse_float_values[0].eval(), features["sparse_float"].values.eval()) self.assertAllEqual(sparse_float_shapes[0].eval(), features["sparse_float"].dense_shape.eval()) self.assertAllEqual(sparse_int_indices[0].eval(), features["sparse_categorical"].indices.eval()) self.assertAllEqual(sparse_int_values[0].eval(), [397263, 397263]) self.assertAllEqual( sparse_int_shapes[0].eval(), features["sparse_categorical"].dense_shape.eval())
def testExtractFeaturesWithTransformation(self): """Tests feature extraction.""" with self.test_session(): features = {} features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32) features["sparse_float"] = sparse_tensor.SparseTensor( array_ops.zeros([2, 2], dtypes.int64), array_ops.zeros([2], dtypes.float32), array_ops.zeros([2], dtypes.int64)) features["sparse_categorical"] = sparse_tensor.SparseTensor( array_ops.zeros([2, 2], dtypes.int64), array_ops.zeros( [2], dtypes.string), array_ops.zeros([2], dtypes.int64)) feature_columns = set() feature_columns.add(layers.real_valued_column("dense_float")) feature_columns.add( layers.feature_column._real_valued_var_len_column( "sparse_float", is_sparse=True)) feature_columns.add( feature_column_lib.sparse_column_with_hash_bucket( "sparse_categorical", hash_bucket_size=1000000)) (fc_names, dense_floats, sparse_float_indices, sparse_float_values, sparse_float_shapes, sparse_int_indices, sparse_int_values, sparse_int_shapes) = (gbdt_batch.extract_features( features, feature_columns)) self.assertEqual(len(fc_names), 3) self.assertAllEqual(fc_names, ["dense_float", "sparse_float", "sparse_categorical"]) self.assertEqual(len(dense_floats), 1) self.assertEqual(len(sparse_float_indices), 1) self.assertEqual(len(sparse_float_values), 1) self.assertEqual(len(sparse_float_shapes), 1) self.assertEqual(len(sparse_int_indices), 1) self.assertEqual(len(sparse_int_values), 1) self.assertEqual(len(sparse_int_shapes), 1) self.assertAllEqual(dense_floats[0].eval(), features["dense_float"].eval()) self.assertAllEqual(sparse_float_indices[0].eval(), features["sparse_float"].indices.eval()) self.assertAllEqual(sparse_float_values[0].eval(), features["sparse_float"].values.eval()) self.assertAllEqual(sparse_float_shapes[0].eval(), features["sparse_float"].dense_shape.eval()) self.assertAllEqual(sparse_int_indices[0].eval(), features["sparse_categorical"].indices.eval()) self.assertAllEqual(sparse_int_values[0].eval(), [397263, 397263]) self.assertAllEqual(sparse_int_shapes[0].eval(), features["sparse_categorical"].dense_shape.eval())
def make_custom_export_strategy(name, convert_fn, feature_columns, export_input_fn): """Makes custom exporter of GTFlow tree format. Args: name: A string, for the name of the export strategy. convert_fn: A function that converts the tree proto to desired format and saves it to the desired location. feature_columns: A list of feature columns. export_input_fn: A function that takes no arguments and returns an `InputFnOps`. Returns: An `ExportStrategy`. """ base_strategy = saved_model_export_utils.make_export_strategy( serving_input_fn=export_input_fn) input_fn = export_input_fn() (sorted_feature_names, dense_floats, sparse_float_indices, _, _, sparse_int_indices, _, _) = gbdt_batch.extract_features( input_fn.features, feature_columns) def export_fn(estimator, export_dir, checkpoint_path=None, eval_result=None): """A wrapper to export to SavedModel, and convert it to other formats.""" result_dir = base_strategy.export(estimator, export_dir, checkpoint_path, eval_result) with ops.Graph().as_default() as graph: with tf_session.Session(graph=graph) as sess: saved_model_loader.load( sess, [tag_constants.SERVING], result_dir) # Note: This is GTFlow internal API and might change. ensemble_model = graph.get_operation_by_name( "ensemble_model/TreeEnsembleSerialize") _, dfec_str = sess.run(ensemble_model.outputs) dtec = tree_config_pb2.DecisionTreeEnsembleConfig() dtec.ParseFromString(dfec_str) # Export the result in the same folder as the saved model. convert_fn(dtec, sorted_feature_names, len(dense_floats), len(sparse_float_indices), len(sparse_int_indices), result_dir, eval_result) return result_dir return export_strategy.ExportStrategy(name, export_fn)
def testExtractFeatures(self): """Tests feature extraction.""" with self.test_session(): features = {} features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32) features["sparse_float"] = sparse_tensor.SparseTensor( array_ops.zeros([2, 2], dtypes.int64), array_ops.zeros([2], dtypes.float32), array_ops.zeros([2], dtypes.int64)) features["sparse_int"] = sparse_tensor.SparseTensor( array_ops.zeros([2, 2], dtypes.int64), array_ops.zeros([2], dtypes.int64), array_ops.zeros([2], dtypes.int64)) (fc_names, dense_floats, sparse_float_indices, sparse_float_values, sparse_float_shapes, sparse_int_indices, sparse_int_values, sparse_int_shapes) = (gbdt_batch.extract_features(features, None)) self.assertEqual(len(fc_names), 3) self.assertAllEqual(fc_names, ["dense_float", "sparse_float", "sparse_int"]) self.assertEqual(len(dense_floats), 1) self.assertEqual(len(sparse_float_indices), 1) self.assertEqual(len(sparse_float_values), 1) self.assertEqual(len(sparse_float_shapes), 1) self.assertEqual(len(sparse_int_indices), 1) self.assertEqual(len(sparse_int_values), 1) self.assertEqual(len(sparse_int_shapes), 1) self.assertAllEqual(dense_floats[0].eval(), features["dense_float"].eval()) self.assertAllEqual(sparse_float_indices[0].eval(), features["sparse_float"].indices.eval()) self.assertAllEqual(sparse_float_values[0].eval(), features["sparse_float"].values.eval()) self.assertAllEqual(sparse_float_shapes[0].eval(), features["sparse_float"].dense_shape.eval()) self.assertAllEqual(sparse_int_indices[0].eval(), features["sparse_int"].indices.eval()) self.assertAllEqual(sparse_int_values[0].eval(), features["sparse_int"].values.eval()) self.assertAllEqual(sparse_int_shapes[0].eval(), features["sparse_int"].dense_shape.eval())
def testExtractFeatures(self): """Tests feature extraction.""" with self.test_session(): features = {} features["dense_float"] = array_ops.zeros([2, 1], dtypes.float32) features["sparse_float"] = sparse_tensor.SparseTensor( array_ops.zeros([2, 2], dtypes.int64), array_ops.zeros([2], dtypes.float32), array_ops.zeros([2], dtypes.int64)) features["sparse_int"] = sparse_tensor.SparseTensor( array_ops.zeros([2, 2], dtypes.int64), array_ops.zeros([2], dtypes.int64), array_ops.zeros([2], dtypes.int64)) (fc_names, dense_floats, sparse_float_indices, sparse_float_values, sparse_float_shapes, sparse_int_indices, sparse_int_values, sparse_int_shapes) = (gbdt_batch.extract_features(features, None)) self.assertEqual(len(fc_names), 3) self.assertAllEqual(fc_names, ["dense_float", "sparse_float", "sparse_int"]) self.assertEqual(len(dense_floats), 1) self.assertEqual(len(sparse_float_indices), 1) self.assertEqual(len(sparse_float_values), 1) self.assertEqual(len(sparse_float_shapes), 1) self.assertEqual(len(sparse_int_indices), 1) self.assertEqual(len(sparse_int_values), 1) self.assertEqual(len(sparse_int_shapes), 1) self.assertAllEqual(dense_floats[0].eval(), features["dense_float"].eval()) self.assertAllEqual(sparse_float_indices[0].eval(), features["sparse_float"].indices.eval()) self.assertAllEqual(sparse_float_values[0].eval(), features["sparse_float"].values.eval()) self.assertAllEqual(sparse_float_shapes[0].eval(), features["sparse_float"].dense_shape.eval()) self.assertAllEqual(sparse_int_indices[0].eval(), features["sparse_int"].indices.eval()) self.assertAllEqual(sparse_int_values[0].eval(), features["sparse_int"].values.eval()) self.assertAllEqual(sparse_int_shapes[0].eval(), features["sparse_int"].dense_shape.eval())
def make_custom_export_strategy(name, convert_fn, feature_columns, export_input_fn, use_core_columns=False, feature_engineering_fn=None, default_output_alternative_key=None): """Makes custom exporter of GTFlow tree format. Args: name: A string, for the name of the export strategy. convert_fn: A function that converts the tree proto to desired format and saves it to the desired location. Can be None to skip conversion. feature_columns: A list of feature columns. export_input_fn: A function that takes no arguments and returns an `InputFnOps`. use_core_columns: A boolean, whether core feature columns were used. feature_engineering_fn: Feature eng function to be called on the input. default_output_alternative_key: the name of the head to serve when an incoming serving request does not explicitly request a specific head. Not needed for single-headed models. Returns: An `ExportStrategy`. """ base_strategy = saved_model_export_utils.make_export_strategy( serving_input_fn=export_input_fn, strip_default_attrs=True, default_output_alternative_key=default_output_alternative_key) input_fn = export_input_fn() features = input_fn.features if feature_engineering_fn is not None: features, _ = feature_engineering_fn(features, labels=None) (sorted_feature_names, dense_floats, sparse_float_indices, _, _, sparse_int_indices, _, _) = gbdt_batch.extract_features(features, feature_columns, use_core_columns) def export_fn(estimator, export_dir, checkpoint_path=None, eval_result=None): """A wrapper to export to SavedModel, and convert it to other formats.""" result_dir = base_strategy.export(estimator, export_dir, checkpoint_path, eval_result) with ops.Graph().as_default() as graph: with tf_session.Session(graph=graph) as sess: saved_model_loader.load(sess, [tag_constants.SERVING], result_dir) # Note: This is GTFlow internal API and might change. ensemble_model = graph.get_operation_by_name( "ensemble_model/TreeEnsembleSerialize") _, dfec_str = sess.run(ensemble_model.outputs) dtec = tree_config_pb2.DecisionTreeEnsembleConfig() dtec.ParseFromString(dfec_str) # Export the result in the same folder as the saved model. if convert_fn: convert_fn(dtec, sorted_feature_names, len(dense_floats), len(sparse_float_indices), len(sparse_int_indices), result_dir, eval_result) feature_importances = _get_feature_importances( dtec, sorted_feature_names, len(dense_floats), len(sparse_float_indices), len(sparse_int_indices)) sorted_by_importance = sorted(feature_importances.items(), key=lambda x: -x[1]) assets_dir = os.path.join(compat.as_bytes(result_dir), compat.as_bytes("assets.extra")) gfile.MakeDirs(assets_dir) with gfile.GFile( os.path.join(compat.as_bytes(assets_dir), compat.as_bytes("feature_importances")), "w") as f: f.write("\n".join("%s, %f" % (k, v) for k, v in sorted_by_importance)) return result_dir return export_strategy.ExportStrategy(name, export_fn, strip_default_attrs=True)