def test_merge_all(self, nest_graph): c0 = tf.constant(0) c1 = tf.constant(1) scoped_summary0 = _ScopedSummary() scoped_summary0.scalar("c0", c0) scoped_summary0.scalar("c1", c1) scoped_summary1 = _ScopedSummary("scope1") scoped_summary1.scalar("c0", c0) scoped_summary1.scalar("c1", c1) scoped_summary2 = _ScopedSummary("scope2") scoped_summary2.scalar("c0", c0) scoped_summary2.scalar("c1", c1) if nest_graph: with tf.Graph().as_default(): scoped_summary2.scalar("c2", tf.constant(2)) with tf.Session() as sess: summaries = scoped_summary2.merge_all() tf.logging.warn("summaries %s", summaries) summary = tf.Summary() summary.ParseFromString(sess.run(tf.summary.merge(summaries))) self.assertEqual(["c2"], [s.tag for s in summary.value]) self.assertEqual([2], [s.simple_value for s in summary.value]) with tf.Session() as sess: for scoped_summary in [scoped_summary0, scoped_summary1, scoped_summary2]: summaries = scoped_summary.merge_all() summary = tf.Summary() summary.ParseFromString(sess.run(tf.summary.merge(summaries))) self.assertEqual(["c0", "c1"], [s.tag for s in summary.value]) self.assertEqual([0, 1], [s.simple_value for s in summary.value])
def test_merge_all(self): c0 = tf.constant(0) c1 = tf.constant(1) scoped_summary0 = _ScopedSummary() scoped_summary0.scalar("c0", c0) scoped_summary0.scalar("c1", c1) scoped_summary1 = _ScopedSummary("scope1") scoped_summary1.scalar("c0", c0) scoped_summary1.scalar("c1", c1) scoped_summary2 = _ScopedSummary("scope2") scoped_summary2.scalar("c0", c0) scoped_summary2.scalar("c1", c1) summary = tf.Summary() with self.test_session() as sess: for scoped_summary in [ scoped_summary0, scoped_summary1, scoped_summary2 ]: merge_op = scoped_summary.merge_all() summary.ParseFromString(sess.run(merge_op)) self.assertEqual(["c0", "c1"], [s.tag for s in summary.value]) self.assertEqual([0, 1], [s.simple_value for s in summary.value])
def test_merge_all(self, nest_graph): c0 = tf.constant(0) c1 = tf.constant(1) scoped_summary0 = _ScopedSummary(self.test_subdirectory, global_step=10) scoped_summary0.scalar("c0", c0) scoped_summary0.scalar("c1", c1) scoped_summary1 = _ScopedSummary(self.test_subdirectory, scope="scope1", global_step=10) scoped_summary1.scalar("c0", c0) scoped_summary1.scalar("c1", c1) scoped_summary2 = _ScopedSummary(self.test_subdirectory, scope="scope2", global_step=10) scoped_summary2.scalar("c0", c0) scoped_summary2.scalar("c1", c1) if nest_graph: with tf.Graph().as_default(): scoped_summary2.scalar("c2", tf.constant(2)) with tf.Session() as sess: sess.run( tf.contrib.summary.summary_writer_initializer_op()) sess.run(scoped_summary2.merge_all()) sess.run(scoped_summary2.flush()) events = self.read_single_event_from_eventfile( scoped_summary2) values = { e.summary.value[0].tag: e.summary.value[0].simple_value for e in events } self.assertEqual({"c2": 2}, values) with tf.Session() as sess: sess.run(tf.contrib.summary.summary_writer_initializer_op()) for scoped_summary in [ scoped_summary0, scoped_summary1, scoped_summary2 ]: sess.run(scoped_summary.merge_all()) sess.run(scoped_summary.flush()) events = self.read_single_event_from_eventfile(scoped_summary) values = { e.summary.value[0].tag: e.summary.value[0].simple_value for e in events } self.assertEqual({"c0": 0, "c1": 1}, values)
def test_summary_args(self): summary = _ScopedSummary() summary.scalar("scalar", 1, "family") summary.image("image", 1, 3, "family") summary.histogram("histogram", 1, "family") summary.audio("audio", 1, 3, 3, "family") self.assertLen(summary.merge_all(), 4)
def test_summary_kwargs(self): summary = _ScopedSummary() summary.scalar(name="scalar", tensor=1, family="family") summary.image(name="image", tensor=1, max_outputs=3, family="family") summary.histogram(name="histogram", values=1, family="family") summary.audio( name="audio", tensor=1, sample_rate=3, max_outputs=3, family="family") self.assertLen(summary.merge_all(), 4)
def test_histogram_summary_with_family(self, scope): scoped_summary = _ScopedSummary(scope) with self.test_session() as s: i = tf.ones((5, 4, 4, 3)) with tf.name_scope("outer"): summ_op = scoped_summary.histogram("inner", i, family="family") summary_str = s.run(summ_op) summary = tf.Summary() summary.ParseFromString(summary_str) self.assertLen(summary.value, 1) self.assertEqual(summary.value[0].tag, "family/outer/family/inner")
def test_monkey_patched_summaries_args(self): summary = _ScopedSummary(self.test_subdirectory, global_step=10) with monkey_patched_summaries(summary): tf.summary.scalar("scalar", 1, ["collection"], "family") tf.summary.image("image", 1, 3, ["collection"], "family") tf.summary.histogram("histogram", 1, ["collection"], "family") tf.summary.audio("audio", 1, 3, 3, ["collection"], "family") tf.contrib.summary.scalar("scalar_v2", 1, "family", 10) tf.contrib.summary.image("image_v2", 1, True, 3, "family", 10) tf.contrib.summary.histogram("histogram_v2", 1, "family", 10) tf.contrib.summary.audio("audio_v2", 1, 3, 3, "family", 10) self.assertLen(summary.merge_all(), 8)
def test_histogram_summary(self, scope, skip_summary=False): scoped_summary = _ScopedSummary(scope, skip_summary) with self.test_session() as s: i = tf.ones((5, 4, 4, 3)) with tf.name_scope("outer"): summ_op = scoped_summary.histogram("inner", i) summary_str = s.run(summ_op) if skip_summary: self.assertEqual("", decode(summary_str)) return summary = tf.Summary() summary.ParseFromString(summary_str) self.assertLen(summary.value, 1) self.assertEqual(summary.value[0].tag, "outer/inner")
def test_summarizing_variable(self, scope): scoped_summary = _ScopedSummary(scope) with self.test_session() as s: c = tf.constant(42.0) v = tf.Variable(c) ss = scoped_summary.scalar("summary", v) init = tf.global_variables_initializer() s.run(init) summ_str = s.run(ss) summary = tf.Summary() summary.ParseFromString(summ_str) self.assertLen(summary.value, 1) value = summary.value[0] self.assertEqual(value.tag, "summary") self.assertEqual(value.simple_value, 42.0)
def test_image_summary_with_family(self, scope): scoped_summary = _ScopedSummary(scope) with self.test_session() as s: i = tf.ones((5, 2, 3, 1)) with tf.name_scope("outer"): im = scoped_summary.image("inner", i, max_outputs=3, family="family") summary_str = s.run(im) summary = tf.Summary() summary.ParseFromString(summary_str) values = summary.value self.assertLen(values, 3) tags = sorted(v.tag for v in values) expected = sorted( "family/outer/family/inner/image/{}".format(i) for i in range(3)) self.assertEqual(tags, expected)
def test_histogram_summary_with_family(self, scope): scoped_summary = _ScopedSummary(self.test_subdirectory, scope=scope, global_step=10) with self.test_session() as s: i = tf.ones((5, 4, 4, 3)) with tf.name_scope("outer"): scoped_summary.histogram("inner", i, family="family") s.run(tf.contrib.summary.summary_writer_initializer_op()) s.run(scoped_summary.merge_all()) s.run(scoped_summary.flush()) events = self.read_single_event_from_eventfile(scoped_summary) values = events[0].summary.value self.assertLen(values, 1) self.assertEqual(values[0].tag, "family/outer/family/inner")
def test_scalar_summary(self, scope, skip_summary=False): scoped_summary = _ScopedSummary(scope, skip_summary) with self.test_session() as s: i = tf.constant(3) with tf.name_scope("outer"): im = scoped_summary.scalar("inner", i) summary_str = s.run(im) if skip_summary: self.assertEqual("", decode(summary_str)) return summary = tf.Summary() summary.ParseFromString(summary_str) values = summary.value self.assertLen(values, 1) self.assertEqual(values[0].tag, "outer/inner") self.assertEqual(values[0].simple_value, 3.0)
def test_audio_summary(self, scope, skip_summary=False): scoped_summary = _ScopedSummary(scope, skip_summary) with self.test_session() as s: i = tf.ones((5, 3, 4)) with tf.name_scope("outer"): aud = scoped_summary.audio("inner", i, 0.2, max_outputs=3) summary_str = s.run(aud) if skip_summary: self.assertEqual("", decode(summary_str)) return summary = tf.Summary() summary.ParseFromString(summary_str) values = summary.value self.assertLen(values, 3) tags = sorted(v.tag for v in values) expected = sorted("outer/inner/audio/{}".format(i) for i in range(3)) self.assertEqual(tags, expected)
def test_summary_name_conversion(self, scope): scoped_summary = _ScopedSummary(scope) c = tf.constant(3) summary = tf.Summary() with self.test_session() as sess: s = scoped_summary.scalar("name with spaces", c) summary.ParseFromString(sess.run(s)) self.assertEqual(summary.value[0].tag, "name_with_spaces") s2 = scoped_summary.scalar("name with many $#illegal^: characters!", c) summary.ParseFromString(sess.run(s2)) self.assertEqual(summary.value[0].tag, "name_with_many___illegal___characters_") s3 = scoped_summary.scalar("/name/with/leading/slash", c) summary.ParseFromString(sess.run(s3)) self.assertEqual(summary.value[0].tag, "name/with/leading/slash")
def test_summary_name_conversion(self, scope): scoped_summary = _ScopedSummary(self.test_subdirectory, scope=scope, global_step=10) c = tf.constant(3) scoped_summary.scalar("name with spaces", c) scoped_summary.scalar("name with many $#illegal^: characters!", c) scoped_summary.scalar("/name/with/leading/slash", c) with self.test_session() as sess: sess.run(tf.contrib.summary.summary_writer_initializer_op()) sess.run(scoped_summary.merge_all()) sess.run(scoped_summary.flush()) events = self.read_single_event_from_eventfile(scoped_summary) self.assertLen(events, 3) tags = [event.summary.value[0].tag for event in events] self.assertIn("name_with_spaces", tags) self.assertIn("name_with_many___illegal___characters_", tags) self.assertIn("name/with/leading/slash", tags)
def test_monkey_patched_summaries_kwargs(self): summary = _ScopedSummary(self.test_subdirectory, global_step=10) with monkey_patched_summaries(summary): tf.summary.scalar(name="scalar", tensor=1, collections=["collection"], family="family") tf.summary.image(name="image", tensor=1, max_outputs=3, collections=["collection"], family="family") tf.summary.histogram(name="histogram", values=1, collections=["collection"], family="family") tf.summary.audio(name="audio", tensor=1, sample_rate=3, max_outputs=3, collections=["collection"], family="family") tf.contrib.summary.scalar(name="scalar_v2", tensor=1, family="family", step=10) tf.contrib.summary.image(name="image_v2", tensor=1, bad_color=True, max_images=3, family="family", step=10) tf.contrib.summary.histogram(name="histogram_v2", tensor=1, family="family", step=10) tf.contrib.summary.audio(name="audio_v2", tensor=1, sample_rate=3, max_outputs=3, family="family", step=10) self.assertLen(summary.merge_all(), 8)
def test_current_scope(self, scope): scoped_summary = _ScopedSummary(self.test_subdirectory, scope=scope, global_step=10) i = tf.constant(3) with tf.variable_scope("outer1"): with tf.variable_scope("outer2"): with scoped_summary.current_scope(): with tf.variable_scope("inner1"): scoped_summary.scalar("inner2/a/b/c", i) with self.test_session() as s: s.run(tf.contrib.summary.summary_writer_initializer_op()) s.run(scoped_summary.merge_all()) s.run(scoped_summary.flush()) events = self.read_single_event_from_eventfile(scoped_summary) values = events[0].summary.value self.assertLen(values, 1) self.assertEqual(values[0].tag, "inner1/inner2/a/b/c") self.assertEqual(values[0].simple_value, 3.0)
def test_summarizing_variable(self, scope): scoped_summary = _ScopedSummary(self.test_subdirectory, scope=scope, global_step=10) with self.test_session() as s: c = tf.constant(42.0) v = tf.Variable(c) scoped_summary.scalar("summary", v) init = tf.global_variables_initializer() s.run(init) s.run(tf.contrib.summary.summary_writer_initializer_op()) s.run(scoped_summary.merge_all()) s.run(scoped_summary.flush()) events = self.read_single_event_from_eventfile(scoped_summary) values = events[0].summary.value self.assertLen(values, 1) value = values[0] self.assertEqual(value.tag, "summary") self.assertEqual(value.simple_value, 42.0)
def test_scalar_summary(self, scope, skip_summary=False): scoped_summary = _ScopedSummary(self.test_subdirectory, scope=scope, skip_summary=skip_summary, global_step=10) with self.test_session() as s: i = tf.constant(3) with tf.name_scope("outer"): scoped_summary.scalar("inner", i) s.run(tf.contrib.summary.summary_writer_initializer_op()) s.run(scoped_summary.merge_all()) s.run(scoped_summary.flush()) if skip_summary: return events = self.read_single_event_from_eventfile(scoped_summary) values = events[0].summary.value self.assertLen(values, 1) self.assertEqual(values[0].tag, "outer/inner") self.assertEqual(values[0].simple_value, 3.0)
def test_audio_summary(self, scope, skip_summary=False): scoped_summary = _ScopedSummary(self.test_subdirectory, scope=scope, skip_summary=skip_summary, global_step=10) with self.test_session() as s: i = tf.ones((5, 3, 4)) with tf.name_scope("outer"): scoped_summary.audio("inner", i, 0.2, max_outputs=3) s.run(tf.contrib.summary.summary_writer_initializer_op()) s.run(scoped_summary.merge_all()) s.run(scoped_summary.flush()) if skip_summary: return events = self.read_single_event_from_eventfile(scoped_summary) values = events[0].summary.value self.assertLen(values, 3) tags = sorted(v.tag for v in values) expected = sorted("outer/inner/audio/{}".format(i) for i in range(3)) self.assertEqual(tags, expected)
def test_image_summary_with_family(self, scope): scoped_summary = _ScopedSummary(self.test_subdirectory, scope=scope, global_step=10) with self.test_session() as s: i = tf.ones((5, 2, 3, 1)) with tf.name_scope("outer"): scoped_summary.image("inner", i, max_outputs=3, family="family") s.run(tf.contrib.summary.summary_writer_initializer_op()) s.run(scoped_summary.merge_all()) s.run(scoped_summary.flush()) events = self.read_single_event_from_eventfile(scoped_summary) values = events[0].summary.value self.assertLen(values, 3) tags = sorted(v.tag for v in values) expected = sorted("family/outer/family/inner/image/{}".format(i) for i in range(3)) self.assertEqual(tags, expected)
def test_scalar_summary_with_family(self, scope): scoped_summary = _ScopedSummary(scope) with self.test_session() as s: i = tf.constant(7) with tf.name_scope("outer"): im1 = scoped_summary.scalar("inner", i, family="family") im2 = scoped_summary.scalar("inner", i, family="family") sm1, sm2 = s.run([im1, im2]) summary = tf.Summary() summary.ParseFromString(sm1) values = summary.value self.assertLen(values, 1) self.assertEqual(values[0].tag, "family/outer/family/inner") self.assertEqual(values[0].simple_value, 7.0) summary.ParseFromString(sm2) values = summary.value self.assertLen(values, 1) self.assertEqual(values[0].tag, "family/outer/family/inner_1") self.assertEqual(values[0].simple_value, 7.0)
def test_scalar_summary_with_family(self, scope): scoped_summary = _ScopedSummary(self.test_subdirectory, scope=scope, global_step=10) with self.test_session() as s: i = tf.constant(7) with tf.name_scope("outer"): scoped_summary.scalar("inner", i, family="family") scoped_summary.scalar("inner", i, family="family") s.run(tf.contrib.summary.summary_writer_initializer_op()) s.run(scoped_summary.merge_all()) s.run(scoped_summary.flush()) events = self.read_single_event_from_eventfile(scoped_summary) self.assertLen(events[0].summary.value, 1) self.assertLen(events[1].summary.value, 1) self.assertEqual( { "family/outer/family/inner": 7.0, "family/outer/family/inner_1": 7.0 }, { event.summary.value[0].tag: event.summary.value[0].simple_value for event in events })
def test_scope(self, scope): scoped_summary = _ScopedSummary(self.test_subdirectory, scope=scope, global_step=10) self.assertEqual(scope, scoped_summary.scope)
def test_scope(self, scope): scoped_summary = _ScopedSummary(scope) self.assertEqual(scope, scoped_summary.scope)
def _model_fn(self, features, labels, mode, params): """AdaNet model_fn. This model_fn is expected to be called four times per iteration. The first call is performed in order to build and train an iteration. Once that iteration is over, the next two calls are freeze its best ensemble for training and evaluation. The final call is responsible for loading the frozen graph, to create new ops for the next iteration, and to overwrite the latest checkpoint with its graph and variables, so that first call of the next iteration has the right ops in the checkpoint. The following parameters in `params` are expected: * freeze_ensemble: Whether to freeze the latest checkpoint's best ensemble to a separate checkpoint for the following iteration to use. * increment_iteration: Whether to overwrite the current checkpoint with the next iteration's graph and initialized weights. Args: features: Dictionary of `Tensor` objects keyed by feature name. labels: `Tensor` of labels. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of parameters. Returns: A `EstimatorSpec` instance. Raises: UserWarning: When calling model_fn directly in TRAIN mode. """ training = mode == tf.estimator.ModeKeys.TRAIN if training and not self._inside_adanet_training_loop: raise UserWarning( "The adanet.Estimator's model_fn should not be called directly in " "TRAIN mode, because its behavior is undefined outside the context " "of its `train` method.") # Wrap features so that their ops always have the same names for when # freezing and loading ensembles. features = self._freezer.wrapped_features(features) iteration_number = self._latest_checkpoint_iteration_number() filtered_features = features record_filename = os.path.join(self.model_dir, "features") if iteration_number == 0 and training: self._record_features(record_filename, features) else: filtered_features = self._filter_recorded_features( record_filename, features) # Use the evaluation checkpoint path to get both the iteration number and # variable values to avoid any race conditions between the first and second # checkpoint reads. if mode == tf.estimator.ModeKeys.EVAL and self._evaluation_checkpoint_path: iteration_number = tf.contrib.framework.load_variable( self._evaluation_checkpoint_path, self._Keys.CURRENT_ITERATION) if self._Keys.INCREMENT_ITERATION in params: iteration_number += 1 ensemble = (None, None) frozen_graph_filename = self._frozen_graph_filename(iteration_number - 1, training) if tf.gfile.Exists(frozen_graph_filename): tf.logging.info( "Importing frozen ensemble from %s with features: [%s].", frozen_graph_filename, ", ".join( sorted(["'{}'".format(f) for f in filtered_features]))) ensemble = self._freezer.load_frozen_ensemble( filename=frozen_graph_filename, features=filtered_features) builder_generator = params[self._Keys.SUBNETWORK_GENERATOR] skip_summaries = mode == tf.estimator.ModeKeys.PREDICT previous_ensemble_summary = _ScopedSummary(self._Keys.FROZEN_ENSEMBLE_NAME, skip_summaries) previous_ensemble_reports, all_reports = [], [] if self._report_materializer: previous_ensemble_reports, all_reports = ( self._collate_subnetwork_reports(iteration_number)) with tf.variable_scope("adanet"): previous_weighted_subnetworks, bias = ensemble previous_ensemble_spec = None if previous_weighted_subnetworks: with tf.variable_scope(self._Keys.FROZEN_ENSEMBLE_NAME): previous_ensemble_spec = self._ensemble_builder.build_ensemble_spec( name=self._Keys.FROZEN_ENSEMBLE_NAME, weighted_subnetworks=previous_weighted_subnetworks, summary=previous_ensemble_summary, bias=bias, features=features, iteration_step=None, mode=mode, labels=labels) previous_ensemble = None if previous_ensemble_spec: previous_ensemble = previous_ensemble_spec.ensemble subnetwork_builders = builder_generator.generate_candidates( previous_ensemble=previous_ensemble, iteration_number=iteration_number, previous_ensemble_reports=previous_ensemble_reports, all_reports=all_reports) current_iteration = self._iteration_builder.build_iteration( iteration_number=iteration_number, subnetwork_builders=subnetwork_builders, features=features, labels=labels, mode=mode, previous_ensemble_summary=previous_ensemble_summary, previous_ensemble_spec=previous_ensemble_spec) # Variable which allows us to read the current iteration from a checkpoint. iteration_number_tensor = tf.get_variable( self._Keys.CURRENT_ITERATION, shape=[], dtype=tf.int64, initializer=tf.zeros_initializer(), trainable=False, collections=[tf.GraphKeys.GLOBAL_VARIABLES]) adanet_summary = _ScopedSummary("global", skip_summaries) adanet_summary.scalar("iteration/adanet/iteration", iteration_number_tensor) adanet_summary.scalar("iteration_step/adanet/iteration_step", current_iteration.step) if current_iteration.estimator_spec.loss is not None: adanet_summary.scalar("loss", current_iteration.estimator_spec.loss) adanet_summary.scalar("loss/adanet/adanet_weighted_ensemble", current_iteration.estimator_spec.loss) iteration_estimator_spec = current_iteration.estimator_spec estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=iteration_estimator_spec.predictions, loss=iteration_estimator_spec.loss, train_op=iteration_estimator_spec.train_op, eval_metric_ops=iteration_estimator_spec.eval_metric_ops, training_hooks=self._training_hooks(current_iteration, training), evaluation_hooks=self._evaluation_hooks(current_iteration), scaffold=tf.train.Scaffold(summary_op=adanet_summary.merge_all()), export_outputs=iteration_estimator_spec.export_outputs) if self._Keys.EVALUATE_ENSEMBLES in params: self._best_ensemble_index = self._get_best_ensemble_index( current_iteration) elif self._Keys.MATERIALIZE_REPORT in params: assert self._best_ensemble_index is not None self._materialize_report(current_iteration) elif self._Keys.FREEZE_ENSEMBLE in params: assert self._best_ensemble_index is not None new_frozen_graph_filename = self._frozen_graph_filename( iteration_number, training) tf.logging.info("Freezing best ensemble to %s", new_frozen_graph_filename) self._freeze_ensemble( filename=new_frozen_graph_filename, current_iteration=current_iteration, features=features) elif self._Keys.INCREMENT_ITERATION in params: latest_checkpoint = tf.train.latest_checkpoint(self.model_dir) tf.logging.info( "Overwriting checkpoint with new graph for iteration %s to %s", iteration_number, latest_checkpoint) self._overwrite_checkpoint(iteration_number_tensor, iteration_number) return estimator_spec
def build_iteration(self, iteration_number, ensemble_candidates, subnetwork_builders, features, mode, labels=None, previous_ensemble_summary=None, previous_ensemble_spec=None, rebuilding=False, params=None): """Builds and returns AdaNet iteration t. This method uses the generated the candidate subnetworks given the ensemble at iteration t-1 and creates graph operations to train them. The returned `_Iteration` tracks the training of all candidates to know when the iteration is over, and tracks the best candidate's predictions and loss, as defined by lowest complexity-regularized loss on the train set. Args: iteration_number: Integer iteration number. ensemble_candidates: Iterable of `adanet.ensemble.Candidate` instances. subnetwork_builders: A list of `Builders` for adding ` Subnetworks` to the graph. Each subnetwork is then wrapped in a `_Candidate` to train. features: Dictionary of `Tensor` objects keyed by feature name. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. labels: `Tensor` of labels. Can be `None`. previous_ensemble_summary: The `_ScopedSummary` for the previous ensemble. previous_ensemble_spec: Optional `_EnsembleSpec` for iteration t-1. rebuilding: Boolean whether the iteration is being rebuilt only to restore the previous best subnetworks and ensembles. params: The model_fn params. Returns: An _Iteration instance. Raises: ValueError: If subnetwork_builders is empty. ValueError: If two subnetworks share the same name. ValueError: If two ensembles share the same name. """ tf.logging.info("%s iteration %s", "Rebuilding" if rebuilding else "Building", iteration_number) if not subnetwork_builders: raise ValueError("Each iteration must have at least one Builder.") # TODO: Consider moving builder mode logic to ensemble_builder.py. builder_mode = mode if rebuilding: # Build the subnetworks and ensembles in EVAL mode by default. This way # their outputs aren't affected by dropout etc. builder_mode = tf.estimator.ModeKeys.EVAL if mode == tf.estimator.ModeKeys.PREDICT: builder_mode = mode # Only replicate in training mode when the user requests it. if self._replicate_ensemble_in_training and ( mode == tf.estimator.ModeKeys.TRAIN): builder_mode = mode training = mode == tf.estimator.ModeKeys.TRAIN skip_summaries = mode == tf.estimator.ModeKeys.PREDICT with tf.variable_scope("iteration_{}".format(iteration_number)): # Iteration step to use instead of global step. iteration_step = tf.get_variable( "step", shape=[], initializer=tf.zeros_initializer(), trainable=False, dtype=tf.int64) # Convert to tensor so that users cannot mutate it. iteration_step_tensor = tf.convert_to_tensor(iteration_step) seen_builder_names = {} candidates = [] summaries = [] subnetwork_reports = {} previous_ensemble = None if previous_ensemble_spec: previous_ensemble = previous_ensemble_spec.ensemble # Include previous best subnetwork as a candidate so that its # predictions are returned until a new candidate outperforms. seen_builder_names = {previous_ensemble_spec.name: True} previous_best_candidate = self._candidate_builder.build_candidate( ensemble_spec=previous_ensemble_spec, training=training, iteration_step=iteration_step_tensor, summary=previous_ensemble_summary, is_previous_best=True) candidates.append(previous_best_candidate) summaries.append(previous_ensemble_summary) # Generate subnetwork reports. if mode == tf.estimator.ModeKeys.EVAL: metrics = {} if previous_ensemble_spec.eval_metrics is not None: metric_fn, kwargs = previous_ensemble_spec.eval_metrics metrics = metric_fn(**kwargs) subnetwork_report = subnetwork.Report( hparams={}, attributes={}, metrics=metrics, ) subnetwork_report.metrics["adanet_loss"] = tf.metrics.mean( previous_ensemble_spec.adanet_loss) subnetwork_reports["previous_ensemble"] = subnetwork_report for subnetwork_builder in subnetwork_builders: if subnetwork_builder.name in seen_builder_names: raise ValueError( "Two subnetworks have the same name '{}'".format( subnetwork_builder.name)) seen_builder_names[subnetwork_builder.name] = True subnetwork_specs = [] for subnetwork_builder in subnetwork_builders: subnetwork_name = "t{}_{}".format(iteration_number, subnetwork_builder.name) subnetwork_summary = _ScopedSummary(namespace="subnetwork", scope=subnetwork_name, skip_summary=skip_summaries or rebuilding) summaries.append(subnetwork_summary) subnetwork_spec = self._subnetwork_manager.build_subnetwork_spec( name=subnetwork_name, subnetwork_builder=subnetwork_builder, iteration_step=iteration_step_tensor, summary=subnetwork_summary, features=features, mode=builder_mode, labels=labels, previous_ensemble=previous_ensemble, params=params) subnetwork_specs.append(subnetwork_spec) # Generate subnetwork reports. if mode != tf.estimator.ModeKeys.PREDICT: subnetwork_report = subnetwork_builder.build_subnetwork_report( ) if not subnetwork_report: subnetwork_report = subnetwork.Report(hparams={}, attributes={}, metrics={}) if subnetwork_spec.eval_metrics is not None: metrics_fn, kwargs = subnetwork_spec.eval_metrics metrics = metrics_fn(**kwargs) for metric_name in sorted(metrics): metric = metrics[metric_name] subnetwork_report.metrics[metric_name] = metric subnetwork_reports[ subnetwork_builder.name] = subnetwork_report # Create (ensembler_candidate*ensembler) ensembles. seen_ensemble_names = {} for ensembler in self._ensemblers: for ensemble_candidate in ensemble_candidates: ensemble_name = "t{}_{}_{}".format(iteration_number, ensemble_candidate.name, ensembler.name) if ensemble_name in seen_ensemble_names: raise ValueError( "Two ensembles have the same name '{}'".format( ensemble_name)) seen_ensemble_names[ensemble_name] = True summary = _ScopedSummary(namespace="ensemble", scope=ensemble_name, skip_summary=skip_summaries or rebuilding) summaries.append(summary) ensemble_spec = self._ensemble_builder.build_ensemble_spec( name=ensemble_name, candidate=ensemble_candidate, ensembler=ensembler, subnetwork_specs=subnetwork_specs, summary=summary, features=features, mode=builder_mode, iteration_step=iteration_step_tensor, iteration_number=iteration_number, labels=labels, previous_ensemble_spec=previous_ensemble_spec, params=params) candidate = self._candidate_builder.build_candidate( ensemble_spec=ensemble_spec, training=training, iteration_step=iteration_step_tensor, summary=summary) candidates.append(candidate) # TODO: Move adanet_loss from subnetwork report to a new # ensemble report, since the adanet_loss is associated with an # ensemble, and only when using a ComplexityRegularizedEnsemblers. # Keep adanet_loss in subnetwork report for backwards compatibility. if len(ensemble_candidates) != len(subnetwork_builders): continue if len(ensemble_candidate.subnetwork_builders) > 1: continue if mode == tf.estimator.ModeKeys.PREDICT: continue builder_name = ensemble_candidate.subnetwork_builders[ 0].name subnetwork_reports[builder_name].metrics[ "adanet_loss"] = tf.metrics.mean( ensemble_spec.adanet_loss) # Dynamically select the outputs of best candidate. best_candidate_index = self._best_candidate_index(candidates) best_predictions = self._best_predictions(candidates, best_candidate_index) best_loss = self._best_loss(candidates, best_candidate_index, mode) best_eval_metrics = self._create_best_eval_metrics_tuple( candidates, subnetwork_specs, best_candidate_index, mode, params) best_export_outputs = self._best_export_outputs( candidates, best_candidate_index, mode, best_predictions) # Hooks on TPU cannot depend on any graph `Tensors`. Instead the value of # `is_over` is stored in a `Variable` that can later be retrieved from # inside a training hook. is_over_var_template = tf.make_template("is_over_var_template", _is_over_var) training_chief_hooks, training_hooks = (), () for subnetwork_spec in subnetwork_specs: if not subnetwork_spec.train_op: continue training_chief_hooks += subnetwork_spec.train_op.chief_hooks or ( ) training_hooks += subnetwork_spec.train_op.hooks or () for candidate in candidates: spec = candidate.ensemble_spec if not spec.train_op: continue training_chief_hooks += spec.train_op.chief_hooks or () training_hooks += spec.train_op.hooks or () summary = _ScopedSummary(namespace=None, scope=None, skip_summary=skip_summaries or rebuilding) summaries.append(summary) with summary.current_scope(): summary.scalar("iteration/adanet/iteration", iteration_number) summary.scalar("iteration_step/adanet/iteration_step", iteration_step_tensor) if best_loss is not None: summary.scalar("loss", best_loss) eval_metric_ops = None if best_eval_metrics is not None: metric_fn, kwargs = best_eval_metrics eval_metric_ops = metric_fn(**kwargs) train_op = self._create_train_op(subnetwork_specs, candidates, mode, iteration_step, is_over_var_template) if self._use_tpu: estimator_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions=best_predictions, loss=best_loss, train_op=train_op, eval_metrics=best_eval_metrics, export_outputs=best_export_outputs, training_hooks=training_hooks) else: eval_metric_ops = None if best_eval_metrics is not None: metric_fn, kwargs = best_eval_metrics eval_metric_ops = metric_fn(**kwargs) estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=best_predictions, loss=best_loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs=best_export_outputs, training_chief_hooks=training_chief_hooks, training_hooks=training_hooks) return _Iteration(number=iteration_number, candidates=candidates, subnetwork_specs=subnetwork_specs, estimator_spec=estimator_spec, best_candidate_index=best_candidate_index, summaries=summaries, is_over_fn=is_over_var_template, subnetwork_reports=subnetwork_reports, step=iteration_step_tensor)
def build_iteration(self, iteration_number, subnetwork_builders, features, mode, labels=None, previous_ensemble_summary=None, previous_ensemble_spec=None, rebuilding=False): """Builds and returns AdaNet iteration t. This method uses the generated the candidate subnetworks given the ensemble at iteration t-1 and creates graph operations to train them. The returned `_Iteration` tracks the training of all candidates to know when the iteration is over, and tracks the best candidate's predictions and loss, as defined by lowest complexity-regularized loss on the train set. Args: iteration_number: Integer iteration number. subnetwork_builders: A list of `Builders` for adding ` Subnetworks` to the graph. Each subnetwork is then wrapped in a `_Candidate` to train. features: Dictionary of `Tensor` objects keyed by feature name. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. labels: `Tensor` of labels. Can be `None`. previous_ensemble_summary: The `_ScopedSummary` for the previous ensemble. previous_ensemble_spec: Optional `_EnsembleSpec` for iteration t-1. rebuilding: Boolean whether the iteration is being rebuilt only to restore the previous best subnetworks and ensembles. Returns: An _Iteration instance. Raises: ValueError: If subnetwork_builders is empty. ValueError: If two `Builder` instances share the same name. """ tf.logging.info("%s iteration %s", "Rebuilding" if rebuilding else "Building", iteration_number) if not subnetwork_builders: raise ValueError("Each iteration must have at least one Builder.") # TODO: Consider moving ensemble mode logic to ensemble.py. ensemble_mode = mode if rebuilding: # Create the frozen ensemble in EVAL mode by default. This way their # outputs aren't affected by dropout etc. ensemble_mode = tf.estimator.ModeKeys.EVAL if mode == tf.estimator.ModeKeys.PREDICT: ensemble_mode = mode # Only replicate in training mode when the user requests it. if self._replicate_ensemble_in_training and ( mode == tf.estimator.ModeKeys.TRAIN): ensemble_mode = mode training = mode == tf.estimator.ModeKeys.TRAIN skip_summaries = mode == tf.estimator.ModeKeys.PREDICT with tf.variable_scope("iteration_{}".format(iteration_number)): # Iteration step to use instead of global step. iteration_step = tf.get_variable( "step", shape=[], initializer=tf.zeros_initializer(), trainable=False, dtype=tf.int64) # Convert to tensor so that users cannot mutate it. iteration_step_tensor = tf.convert_to_tensor(iteration_step) seen_builder_names = {} candidates = [] summaries = [] subnetwork_reports = {} # TODO: Consolidate building subnetwork into # candidate_builder. if previous_ensemble_spec: # Include previous best subnetwork as a candidate so that its # predictions are returned until a new candidate outperforms. seen_builder_names = {previous_ensemble_spec.name: True} previous_best_candidate = self._candidate_builder.build_candidate( ensemble_spec=previous_ensemble_spec, training=training, iteration_step=iteration_step_tensor, summary=previous_ensemble_summary, is_previous_best=True) candidates.append(previous_best_candidate) summaries.append(previous_ensemble_summary) # Generate subnetwork reports. if mode == tf.estimator.ModeKeys.EVAL: subnetwork_report = subnetwork.Report( hparams={}, attributes={}, metrics=(previous_ensemble_spec.eval_metric_ops.copy() if previous_ensemble_spec.eval_metric_ops is not None else {}), ) subnetwork_report.metrics["adanet_loss"] = tf.metrics.mean( previous_ensemble_spec.adanet_loss) subnetwork_reports["previous_ensemble"] = subnetwork_report for subnetwork_builder in subnetwork_builders: if subnetwork_builder.name in seen_builder_names: raise ValueError( "Two ensembles have the same name '{}'".format( subnetwork_builder.name)) seen_builder_names[subnetwork_builder.name] = True ensemble_name = "t{}_{}".format(iteration_number, subnetwork_builder.name) summary = _ScopedSummary(ensemble_name, skip_summary=skip_summaries or rebuilding) summaries.append(summary) ensemble_spec = self._ensemble_builder.append_new_subnetwork( ensemble_name=ensemble_name, ensemble_spec=previous_ensemble_spec, iteration_number=iteration_number, subnetwork_builder=subnetwork_builder, summary=summary, features=features, mode=ensemble_mode, iteration_step=iteration_step_tensor, labels=labels) candidate = self._candidate_builder.build_candidate( ensemble_spec=ensemble_spec, training=training, iteration_step=iteration_step_tensor, summary=summary) candidates.append(candidate) # Generate subnetwork reports. if mode != tf.estimator.ModeKeys.PREDICT: subnetwork_report = subnetwork_builder.build_subnetwork_report( ) if not subnetwork_report: subnetwork_report = subnetwork.Report(hparams={}, attributes={}, metrics={}) if ensemble_spec.eval_metric_ops is not None: for metric_name in sorted( ensemble_spec.eval_metric_ops): metric = ensemble_spec.eval_metric_ops[metric_name] subnetwork_report.metrics[metric_name] = metric subnetwork_report.metrics["adanet_loss"] = tf.metrics.mean( ensemble_spec.adanet_loss) subnetwork_reports[ subnetwork_builder.name] = subnetwork_report # Dynamically select the outputs of best candidate. best_candidate_index = self._best_candidate_index(candidates) best_predictions = self._best_predictions(candidates, best_candidate_index) best_loss = self._best_loss(candidates, best_candidate_index, mode) best_eval_metric_ops = self._best_eval_metric_ops( candidates, best_candidate_index, mode) best_export_outputs = self._best_export_outputs( candidates, best_candidate_index, mode, best_predictions) # Hooks on TPU cannot depend on any graph `Tensors`. Instead the value of # `is_over` is stored in a `Variable` that can later be retrieved from # inside a training hook. is_over_var_fn = tf.make_template("is_over_var_fn", is_over_var) estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=best_predictions, loss=best_loss, train_op=self._create_train_op(candidates, mode, iteration_step, is_over_var_fn), eval_metric_ops=best_eval_metric_ops, export_outputs=best_export_outputs) return _Iteration(number=iteration_number, candidates=candidates, estimator_spec=estimator_spec, best_candidate_index=best_candidate_index, summaries=summaries, is_over_fn=is_over_var_fn, subnetwork_reports=subnetwork_reports, step=iteration_step_tensor)
def build_iteration(self, iteration_number, subnetwork_builders, features, mode, labels=None, previous_ensemble_summary=None, previous_ensemble_spec=None): """Builds and returns AdaNet iteration t. This method uses the generated the candidate subnetworks given the ensemble at iteration t-1 and creates graph operations to train them. The returned `_Iteration` tracks the training of all candidates to know when the iteration is over, and tracks the best candidate's predictions and loss, as defined by lowest complexity-regularized loss on the train set. Args: iteration_number: The iteration number. subnetwork_builders: A list of `Builders` for adding ` Subnetworks` to the graph. Each subnetwork is then wrapped in a `_Candidate` to train. features: Dictionary of `Tensor` objects keyed by feature name. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. labels: `Tensor` of labels. Can be `None`. previous_ensemble_summary: The `_ScopedSummary` for the previous ensemble. previous_ensemble_spec: Optional `_EnsembleSpec` for iteration t-1. Returns: An _Iteration instance. Raises: ValueError: If subnetwork_builders is empty. ValueError: If two `Builder` instances share the same name. """ if not subnetwork_builders: raise ValueError("Each iteration must have at least one Builder.") training = mode == tf.estimator.ModeKeys.TRAIN skip_summaries = mode == tf.estimator.ModeKeys.PREDICT with tf.variable_scope("iteration_{}".format(iteration_number)): # Iteration step to use instead of global step. iteration_step = tf.get_variable( "step", shape=[], initializer=tf.zeros_initializer(), trainable=False, dtype=tf.int64) # Convert to tensor so that users cannot mutate it. iteration_step_tensor = tf.convert_to_tensor(iteration_step) seen_builder_names = {} candidates = [] summaries = [] subnetwork_reports = {} # TODO: Consolidate building subnetwork into # candidate_builder. if previous_ensemble_spec: # Include previous best subnetwork as a candidate so that its # predictions are returned until a new candidate outperforms. seen_builder_names = {previous_ensemble_spec.name: True} previous_best_candidate = self._candidate_builder.build_candidate( ensemble_spec=previous_ensemble_spec, training=training, iteration_step=iteration_step_tensor, summary=previous_ensemble_summary, is_previous_best=True) candidates.append(previous_best_candidate) summaries.append(previous_ensemble_summary) # Generate subnetwork reports. if mode != tf.estimator.ModeKeys.PREDICT: subnetwork_report = subnetwork.Report( hparams={}, attributes={}, metrics=(previous_ensemble_spec.eval_metric_ops.copy() if previous_ensemble_spec.eval_metric_ops is not None else {}), ) subnetwork_report.metrics["adanet_loss"] = tf.metrics.mean( previous_ensemble_spec.adanet_loss) subnetwork_reports["previous_ensemble"] = subnetwork_report for subnetwork_builder in subnetwork_builders: if subnetwork_builder.name in seen_builder_names: raise ValueError( "Two ensembles have the same name '{}'".format( subnetwork_builder.name)) seen_builder_names[subnetwork_builder.name] = True summary = _ScopedSummary(subnetwork_builder.name, skip_summary=skip_summaries) summaries.append(summary) ensemble_spec = self._ensemble_builder.append_new_subnetwork( ensemble_spec=previous_ensemble_spec, subnetwork_builder=subnetwork_builder, summary=summary, features=features, mode=mode, iteration_step=iteration_step_tensor, labels=labels) candidate = self._candidate_builder.build_candidate( ensemble_spec=ensemble_spec, training=training, iteration_step=iteration_step_tensor, summary=summary) candidates.append(candidate) # Generate subnetwork reports. if mode != tf.estimator.ModeKeys.PREDICT: subnetwork_report = subnetwork_builder.build_subnetwork_report( ) if not subnetwork_report: subnetwork_report = subnetwork.Report(hparams={}, attributes={}, metrics={}) if ensemble_spec.eval_metric_ops is not None: for metric_name, metric in ensemble_spec.eval_metric_ops.items( ): subnetwork_report.metrics[metric_name] = metric subnetwork_report.metrics["adanet_loss"] = tf.metrics.mean( ensemble_spec.adanet_loss) subnetwork_reports[ subnetwork_builder.name] = subnetwork_report best_candidate_index = 0 best_predictions = candidates[0].ensemble_spec.predictions best_loss = candidates[0].ensemble_spec.loss best_eval_metric_ops = candidates[0].ensemble_spec.eval_metric_ops best_export_outputs = candidates[0].ensemble_spec.export_outputs if len(candidates) >= 1: # Dynamically select the outputs of best candidate. best_candidate_index = self._best_candidate_index(candidates) best_predictions = self._best_predictions( candidates, best_candidate_index) best_loss = self._best_loss(candidates, best_candidate_index, mode) best_eval_metric_ops = self._best_eval_metric_ops( candidates, best_candidate_index) best_export_outputs = self._best_export_outputs( candidates, best_candidate_index, mode, best_predictions) estimator_spec = tf.estimator.EstimatorSpec( mode=mode, predictions=best_predictions, loss=best_loss, train_op=self._create_train_op(candidates, mode, iteration_step), eval_metric_ops=best_eval_metric_ops, export_outputs=best_export_outputs) return _Iteration(number=iteration_number, candidates=candidates, estimator_spec=estimator_spec, best_candidate_index=best_candidate_index, summaries=summaries, is_over=self._is_over(candidates), subnetwork_reports=subnetwork_reports, step=iteration_step_tensor)