def test_no_optional_args_export(self): model_path = '/path/to/model' def _export_fn(estimator, export_path): self.assertTupleEqual((estimator, export_path), (None, None)) return model_path strategy = export_strategy.ExportStrategy('foo', _export_fn) self.assertTupleEqual(strategy, ('foo', _export_fn, None)) self.assertIs(strategy.export(None, None), model_path)
def test_eval_only_export(self): def _eval_export_fn(estimator, export_path, eval_result): del estimator, export_path, eval_result strategy = export_strategy.ExportStrategy('foo', _eval_export_fn) self.assertTupleEqual(strategy, ('foo', _eval_export_fn, None)) with self.assertRaisesRegexp(ValueError, 'An export_fn accepting ' 'eval_result must also accept ' 'checkpoint_path'): strategy.export(None, None, eval_result='eval')
def test_checkpoint_export(self): ckpt_model_path = '/path/to/checkpoint_model' def _ckpt_export_fn(estimator, export_path, checkpoint_path): self.assertTupleEqual((estimator, export_path), (None, None)) self.assertEqual(checkpoint_path, 'checkpoint') return ckpt_model_path strategy = export_strategy.ExportStrategy('foo', _ckpt_export_fn) self.assertTupleEqual(strategy, ('foo', _ckpt_export_fn, None)) self.assertIs(strategy.export(None, None, 'checkpoint'), ckpt_model_path)
def test_strip_default_attr_export(self): strip_default_attrs_model_path = '/path/to/strip_default_attrs_model' def _strip_default_attrs_export_fn(estimator, export_path, strip_default_attrs): self.assertTupleEqual((estimator, export_path), (None, None)) self.assertTrue(strip_default_attrs) return strip_default_attrs_model_path strategy = export_strategy.ExportStrategy('foo', _strip_default_attrs_export_fn, True) self.assertTupleEqual(strategy, ('foo', _strip_default_attrs_export_fn, True)) self.assertIs(strategy.export(None, None), strip_default_attrs_model_path)
def make_custom_export_strategy(name, convert_fn, feature_columns, export_input_fn, use_core_columns=False, feature_engineering_fn=None, default_output_alternative_key=None): """Makes custom exporter of GTFlow tree format. Args: name: A string, for the name of the export strategy. convert_fn: A function that converts the tree proto to desired format and saves it to the desired location. Can be None to skip conversion. feature_columns: A list of feature columns. export_input_fn: A function that takes no arguments and returns an `InputFnOps`. use_core_columns: A boolean, whether core feature columns were used. feature_engineering_fn: Feature eng function to be called on the input. default_output_alternative_key: the name of the head to serve when an incoming serving request does not explicitly request a specific head. Not needed for single-headed models. Returns: An `ExportStrategy`. """ base_strategy = saved_model_export_utils.make_export_strategy( serving_input_fn=export_input_fn, strip_default_attrs=True, default_output_alternative_key=default_output_alternative_key) input_fn = export_input_fn() features = input_fn.features if feature_engineering_fn is not None: features, _ = feature_engineering_fn(features, labels=None) (sorted_feature_names, dense_floats, sparse_float_indices, _, _, sparse_int_indices, _, _) = gbdt_batch.extract_features( features, feature_columns, use_core_columns) def export_fn(estimator, export_dir, checkpoint_path=None, eval_result=None): """A wrapper to export to SavedModel, and convert it to other formats.""" result_dir = base_strategy.export(estimator, export_dir, checkpoint_path, eval_result) with ops.Graph().as_default() as graph: with tf_session.Session(graph=graph) as sess: saved_model_loader.load( sess, [tag_constants.SERVING], result_dir) # Note: This is GTFlow internal API and might change. ensemble_model = graph.get_operation_by_name( "ensemble_model/TreeEnsembleSerialize") _, dfec_str = sess.run(ensemble_model.outputs) dtec = tree_config_pb2.DecisionTreeEnsembleConfig() dtec.ParseFromString(dfec_str) # Export the result in the same folder as the saved model. if convert_fn: convert_fn(dtec, sorted_feature_names, len(dense_floats), len(sparse_float_indices), len(sparse_int_indices), result_dir, eval_result) feature_importances = _get_feature_importances( dtec, sorted_feature_names, len(dense_floats), len(sparse_float_indices), len(sparse_int_indices)) sorted_by_importance = sorted( feature_importances.items(), key=lambda x: -x[1]) assets_dir = os.path.join( compat.as_bytes(result_dir), compat.as_bytes("assets.extra")) gfile.MakeDirs(assets_dir) with gfile.GFile(os.path.join( compat.as_bytes(assets_dir), compat.as_bytes("feature_importances")), "w") as f: f.write("\n".join("%s, %f" % (k, v) for k, v in sorted_by_importance)) return result_dir return export_strategy.ExportStrategy( name, export_fn, strip_default_attrs=True)
def extend_export_strategy(base_export_strategy, post_export_fn, post_export_name=None): """Extend ExportStrategy, calling post_export_fn after export. Args: base_export_strategy: An ExportStrategy that can be passed to the Experiment constructor. post_export_fn: A user-specified function to call after exporting the SavedModel. Takes two arguments - the path to the SavedModel exported by base_export_strategy and the directory where to export the SavedModel modified by the post_export_fn. Returns the path to the exported SavedModel. post_export_name: The directory name under the export base directory where SavedModels generated by the post_export_fn will be written. If None, the directory name of base_export_strategy is used. Returns: An ExportStrategy that can be passed to the Experiment constructor. """ def export_fn(estimator, export_dir_base, checkpoint_path=None): """Exports the given Estimator as a SavedModel and invokes post_export_fn. Args: estimator: the Estimator to export. export_dir_base: A string containing a directory to write the exported graphs and checkpoint. checkpoint_path: The checkpoint path to export. If None (the default), the most recent checkpoint found within the model directory is chosen. Returns: The string path to the SavedModel indicated by post_export_fn. Raises: ValueError: If `estimator` is a `tf.estimator.Estimator` instance and `default_output_alternative_key` was specified or if post_export_fn does not return a valid directory. RuntimeError: If unable to create temporary or final export directory. """ tmp_base_export_folder = 'temp-base-export-' + str(int(time.time())) tmp_base_export_dir = os.path.join(export_dir_base, tmp_base_export_folder) if gfile.Exists(tmp_base_export_dir): raise RuntimeError('Failed to obtain base export directory') gfile.MakeDirs(tmp_base_export_dir) tmp_base_export = base_export_strategy.export(estimator, tmp_base_export_dir, checkpoint_path) tmp_post_export_folder = 'temp-post-export-' + str(int(time.time())) tmp_post_export_dir = os.path.join(export_dir_base, tmp_post_export_folder) if gfile.Exists(tmp_post_export_dir): raise RuntimeError('Failed to obtain temp export directory') gfile.MakeDirs(tmp_post_export_dir) tmp_post_export = post_export_fn(tmp_base_export, tmp_post_export_dir) if not tmp_post_export.startswith(tmp_post_export_dir): raise ValueError( 'post_export_fn must return a sub-directory of {}'.format( tmp_post_export_dir)) post_export_relpath = os.path.relpath(tmp_post_export, tmp_post_export_dir) post_export = os.path.join(export_dir_base, post_export_relpath) if gfile.Exists(post_export): raise RuntimeError('Failed to obtain final export directory') gfile.Rename(tmp_post_export, post_export) gfile.DeleteRecursively(tmp_base_export_dir) gfile.DeleteRecursively(tmp_post_export_dir) return post_export name = post_export_name if post_export_name else base_export_strategy.name return export_strategy.ExportStrategy(name, export_fn)
def make_best_model_export_strategy(serving_input_fn, exports_to_keep=1, model_dir=None, event_file_pattern=None, compare_fn=None, default_output_alternative_key=None, strip_default_attrs=None): """Creates an custom ExportStrategy for use with tf.contrib.learn.Experiment. Args: serving_input_fn: a function that takes no arguments and returns an `InputFnOps`. exports_to_keep: an integer indicating how many historical best models need to be preserved. model_dir: Directory where model parameters, graph etc. are saved. This will be used to load eval metrics from the directory when the export strategy is created. So the best metrics would not be lost even if the export strategy got preempted, which guarantees that only the best model would be exported regardless of preemption. If None, however, the export strategy would not be preemption-safe. To be preemption-safe, both model_dir and event_file_pattern would be needed. event_file_pattern: event file name pattern relative to model_dir, e.g. "eval_continuous/*.tfevents.*". If None, however, the export strategy would not be preemption-safe. To be preemption-safe, both model_dir and event_file_pattern would be needed. compare_fn: a function that select the 'best' candidate from a dictionary of evaluation result keyed by corresponding checkpoint path. default_output_alternative_key: the key for default serving signature for multi-headed inference graphs. strip_default_attrs: Boolean. If True, default attrs in the `GraphDef` will be stripped on write. This is recommended for better forward compatibility of the resulting `SavedModel`. Returns: An ExportStrategy that can be passed to the Experiment constructor. """ best_model_export_strategy = make_export_strategy( serving_input_fn, exports_to_keep=exports_to_keep, default_output_alternative_key=default_output_alternative_key, strip_default_attrs=strip_default_attrs) full_event_file_pattern = os.path.join( model_dir, event_file_pattern) if model_dir and event_file_pattern else None best_model_selector = BestModelSelector(full_event_file_pattern, compare_fn) def export_fn(estimator, export_dir_base, checkpoint_path, eval_result=None): """Exports the given Estimator as a SavedModel. Args: estimator: the Estimator to export. export_dir_base: A string containing a directory to write the exported graph and checkpoints. checkpoint_path: The checkpoint path to export. If None (the default), the most recent checkpoint found within the model directory is chosen. eval_result: placehold args matching the call signature of ExportStrategy. Returns: The string path to the exported directory. """ if not checkpoint_path: # TODO(b/67425018): switch to # checkpoint_path = estimator.latest_checkpoint() # as soon as contrib is cleaned up and we can thus be sure that # estimator is a tf.estimator.Estimator and not a # tf.contrib.learn.Estimator checkpoint_path = checkpoint_management.latest_checkpoint( estimator.model_dir) export_checkpoint_path, export_eval_result = best_model_selector.update( checkpoint_path, eval_result) if export_checkpoint_path and export_eval_result is not None: checkpoint_base = os.path.basename(export_checkpoint_path) export_dir = os.path.join(export_dir_base, checkpoint_base) return best_model_export_strategy.export(estimator, export_dir, export_checkpoint_path, export_eval_result) else: return '' return export_strategy.ExportStrategy('best_model', export_fn)
def make_export_strategy(serving_input_fn, default_output_alternative_key=None, assets_extra=None, as_text=False, exports_to_keep=5, strip_default_attrs=None): """Create an ExportStrategy for use with Experiment. Args: serving_input_fn: A function that takes no arguments and returns an `InputFnOps`. default_output_alternative_key: the name of the head to serve when an incoming serving request does not explicitly request a specific head. Must be `None` if the estimator inherits from `tf.estimator.Estimator` or for single-headed models. assets_extra: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination path (including the filename) relative to the assets.extra directory. The corresponding value gives the full path of the source file to be copied. For example, the simple case of copying a single file without renaming it is specified as `{'my_asset_file.txt': '/path/to/my_asset_file.txt'}`. as_text: whether to write the SavedModel proto in text format. exports_to_keep: Number of exports to keep. Older exports will be garbage-collected. Defaults to 5. Set to None to disable garbage collection. strip_default_attrs: Boolean. If True, default attrs in the `GraphDef` will be stripped on write. This is recommended for better forward compatibility of the resulting `SavedModel`. Returns: An ExportStrategy that can be passed to the Experiment constructor. """ def export_fn(estimator, export_dir_base, checkpoint_path=None, strip_default_attrs=False): """Exports the given Estimator as a SavedModel. Args: estimator: the Estimator to export. export_dir_base: A string containing a directory to write the exported graph and checkpoints. checkpoint_path: The checkpoint path to export. If None (the default), the most recent checkpoint found within the model directory is chosen. strip_default_attrs: Boolean. If `True`, default-valued attributes will be removed from the NodeDefs. Returns: The string path to the exported directory. Raises: ValueError: If `estimator` is a `tf.estimator.Estimator` instance and `default_output_alternative_key` was specified. """ if isinstance(estimator, core_estimator.Estimator): if default_output_alternative_key is not None: raise ValueError( 'default_output_alternative_key is not supported in core ' 'Estimator. Given: {}'.format( default_output_alternative_key)) export_result = estimator.export_savedmodel( export_dir_base, serving_input_fn, assets_extra=assets_extra, as_text=as_text, checkpoint_path=checkpoint_path, strip_default_attrs=strip_default_attrs) else: export_result = estimator.export_savedmodel( export_dir_base, serving_input_fn, default_output_alternative_key=default_output_alternative_key, assets_extra=assets_extra, as_text=as_text, checkpoint_path=checkpoint_path, strip_default_attrs=strip_default_attrs) garbage_collect_exports(export_dir_base, exports_to_keep) return export_result return export_strategy.ExportStrategy('Servo', export_fn, strip_default_attrs)