def metric_fn(loss_value, label_ids, log_probs): loss = tf.compat.v1.metrics.mean(values=loss_value) predictions = tf.argmax(log_probs, axis=-1, output_type=tf.int32) accuracy = tf.compat.v1.metrics.accuracy( labels=label_ids, predictions=predictions) p1, p1_op = tf.compat.v1.metrics.precision_at_k( labels=tf.cast(label_ids, tf.int64), predictions=log_probs, k=1) r1, r1_op = tf.compat.v1.metrics.recall_at_k( labels=tf.cast(label_ids, tf.int64), predictions=log_probs, k=1) f11 = tf.math.divide_no_nan(2 * p1 * r1, p1 + r1) metric_dict = { "P@1": (p1, p1_op), "R@1": (r1, r1_op), "f1@1": (f11, tf.no_op()), "classification_accuracy": accuracy, "classification_loss": loss, } return metric_dict
def assert_shape_equal(shape_a, shape_b): """Asserts that shape_a and shape_b are equal. If the shapes are static, raises a ValueError when the shapes mismatch. If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes mismatch. Args: shape_a: a list containing shape of the first tensor. shape_b: a list containing shape of the second tensor. Returns: Either a tf.no_op() when shapes are all static and a tf.assert_equal() op when the shapes are dynamic. Raises: ValueError: When shapes are both static and unequal. """ if (all(isinstance(dim, int) for dim in shape_a) and all(isinstance(dim, int) for dim in shape_b)): if shape_a != shape_b: raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b)) else: return tf.no_op() else: return tf.assert_equal(shape_a, shape_b)
def _apply_gradients_cross_replica(self, distribution, grads_and_vars, name, experimental_aggregate_gradients): grads = [g for g, _ in grads_and_vars] if isinstance(self._loss_scale, _DynamicLossScaleState): loss_scale_update_op, should_apply_grads = self._loss_scale.update(grads) else: loss_scale_update_op = tf.no_op() should_apply_grads = True def apply_fn(): # We do not want DistributionStrategy to unwrap any MirroredVariables in # grads_and_vars, because even in a replica context, the wrapped optimizer # expects mirrored variables. So we wrap the variables with an # _UnwrapPreventer, preventing DistributionStrategy from unwrapping the # MirroredVariables. wrapped_vars = _UnwrapPreventer([v for _, v in grads_and_vars]) return distribution.extended.call_for_each_replica( self._apply_gradients, args=(grads, wrapped_vars, name, experimental_aggregate_gradients)) def do_not_apply_fn(): # Normally self._optimizer.iterations is incremented in # self._optimizer.apply_gradients(). Since that is not called in this # branch, we increment it here instead. return self._optimizer.iterations.assign_add(1, read_value=False) # Note: We must call this cond() in a cross-replica context. # DistributionStrategy does not support having a cond in a replica context # with a branch that calls `merge_call`, and self._optimizer.apply_gradients # calls `merge_call`. maybe_apply_op = smart_cond.smart_cond(should_apply_grads, apply_fn, do_not_apply_fn) return tf.group(maybe_apply_op, loss_scale_update_op)
def testDofChangeError(self): exp = tfb.Exp() smc = tfb.SoftmaxCentered() # Increase in event-size is the last step. No problems here. safe_bij = tfb.Chain([smc, exp], validate_args=True, validate_event_size=True) self.evaluate(safe_bij.forward_log_det_jacobian([1., 2., 3.], 1)) # Increase in event-size before Exp. raise_bij = tfb.Chain([exp, smc], validate_args=True, validate_event_size=True) with self.assertRaisesRegex((ValueError, tf.errors.InvalidArgumentError), r".+degrees of freedom.+"): self.evaluate(raise_bij.forward_log_det_jacobian([1., 2., 3.], 1)) # When validate_args is False, warns instead of raising. warn_bij = tfb.Chain([exp, smc], validate_args=False, validate_event_size=True) with mock.patch.object(tf, "print", return_value=tf.no_op()) as mock_print: self.evaluate(warn_bij.forward_log_det_jacobian([1., 2., 3.], 1)) print_args, _ = mock_print.call_args self.assertRegex(print_args[0], r"WARNING:.+degrees of freedom") # When validate_event_shape is False, neither warns nor raises. ignore_bij = tfb.Chain([exp, smc], validate_event_size=False) self.evaluate(ignore_bij.forward_log_det_jacobian([1., 2., 3.], 1))
def test_increment_global_step(self, use_parameter_scaled_training, train_steps, maximum_depth, architecture, expected): # Force graph mode with tf.compat.v1.Graph().as_default(): with self.test_session(graph=tf.Graph()) as sess: tower_name = "tower" if architecture is not None: architecture_utils.set_architecture(architecture, tower_name=tower_name) spec = self._create_phoenix_spec(problem_type="cnn") spec.maximum_depth = maximum_depth spec.use_parameter_scaled_training = use_parameter_scaled_training instance = phoenix.Phoenix(phoenix_spec=spec, input_layer_fn=lambda: None, logits_dimension=0, study_name="test", study_owner="test") global_step = tf.compat.v1.train.get_or_create_global_step() sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.local_variables_initializer()) before = sess.run(global_step) op = instance._increment_global_step(train_op=tf.no_op(), train_steps=train_steps, tower_name=tower_name) sess.run(op) after = sess.run(global_step) self.assertEqual(before, 0) self.assertEqual(after, expected)
def _architecture_metric_fn(): """Manually creates the tf.metric with a serialized tf.Summary proto.""" # TODO: Should architecture.subnetworks be sorted by iteration # number first? Or perhaps, to make this more general, to have one line # for each iteration, with "|" as a delimiter if there are multiple # subnetworks in one iteration? Something like: # 0 linear # 1 dnn_width_32_depth_1 | dnn_width_64_depth_1 # 2 # 3 dnn_with_32_depth_2 # Also consider adding ensemble candidate's name, though that is already # included in the ensemble name. architecture_ = " | ".join( [name for _, name in architecture.subnetworks]) architecture_ = "| {} |".format(architecture_) summary_metadata = tf_compat.v1.SummaryMetadata( plugin_data=tf_compat.v1.SummaryMetadata.PluginData( plugin_name="text")) summary_proto = tf_compat.v1.summary.Summary() summary_proto.value.add(metadata=summary_metadata, tag="architecture/adanet", tensor=tf_compat.v1.make_tensor_proto( architecture_, dtype=tf.string)) architecture_summary = tf.convert_to_tensor( value=summary_proto.SerializeToString(), name="architecture") return { "architecture/adanet/ensembles": (architecture_summary, tf.no_op()) }
def _decay_weights_op(self, var, learning_rate, apply_state): do_decay = self._do_use_weight_decay(var.name) if do_decay: return var.assign_sub( learning_rate * var * apply_state[(var.device, var.dtype.base_dtype)]['weight_decay_rate'], use_locking=self._use_locking) return tf.no_op()
def build_mixture_weights_train_op(self, loss, var_list, logits, labels, iteration_step, summary): """See `adanet.subnetwork.Builder`.""" if not self._learn_mixture_weights: return tf.no_op("mixture_weights_train_op") # NOTE: The `adanet.Estimator` increments the global step. return self._optimizer.minimize(loss=loss, var_list=var_list)
def _resource_apply_dense(self, grad, param, apply_state=None): if grad is None or param is None: return tf.no_op() var_device, var_dtype = param.device, param.dtype.base_dtype coefficients = ((apply_state or {}).get((var_device, var_dtype)) or self._fallback_apply_state(var_device, var_dtype)) learning_rate = coefficients["lr_t"] param_name = param.name v = self.get_slot(param, "Momentum") if self._use_weight_decay(param_name): grad += self.weight_decay * param if self.classic_momentum: trust_ratio = 1.0 if self._do_layer_adaptation(param_name): w_norm = tf.norm(param, ord=2) g_norm = tf.norm(grad, ord=2) trust_ratio = tf.where( tf.greater(w_norm, 0), tf.where(tf.greater(g_norm, 0), (self.eeta * w_norm / g_norm), 1.0), 1.0) scaled_lr = learning_rate * trust_ratio next_v = tf.multiply(self.momentum, v) + scaled_lr * grad if self.use_nesterov: update = tf.multiply(self.momentum, next_v) + scaled_lr * grad else: update = next_v next_param = param - update else: next_v = tf.multiply(self.momentum, v) + grad if self.use_nesterov: update = tf.multiply(self.momentum, next_v) + grad else: update = next_v trust_ratio = 1.0 if self._do_layer_adaptation(param_name): w_norm = tf.norm(param, ord=2) v_norm = tf.norm(update, ord=2) trust_ratio = tf.where( tf.greater(w_norm, 0), tf.where(tf.greater(v_norm, 0), (self.eeta * w_norm / v_norm), 1.0), 1.0) scaled_lr = trust_ratio * learning_rate next_param = param - scaled_lr * update return tf.group(*[ param.assign(next_param, use_locking=False), v.assign(next_v, use_locking=False) ])
def _create_host_call(self, current_iteration, training): """Construct a host_call writing scalar summaries. Args: current_iteration: The current `_Iteration`. training: Boolean indicating whether in training mode. Returns: (fn, args) Pair to be called by TPUEstimator as the host_call. """ if not training: return lambda **kwargs: [tf.no_op()], {} # Collect and flatten summary functions and arguments. summary_kwargs = collections.OrderedDict() gs_t = tf.reshape(tf.cast(tf.train.get_global_step(), dtype=tf.int32), [1]) summary_kwargs["global_step"] = gs_t summary_fns = collections.defaultdict(list) for i, summary in enumerate(current_iteration.summaries): for j, (summary_fn, tensor) in enumerate(summary.summary_tuples()): summary_fns[i].append(summary_fn) summary_kwargs["summary_{}_{}".format(i, j)] = tensor def _host_call_fn(**kwargs): """Training host call. Creates summaries for training metrics. Args: **kwargs: Dict of {str: Tensor} , with `Tensor` of shape `[batch]`. Must contain key "global_step" with value of current global_step Tensor. Returns: List of summary ops to run on the CPU host. """ from tensorflow.python.ops import summary_ops_v2 # pylint: disable=g-direct-tensorflow-import,g-import-not-at-top gs = tf.cast(kwargs.pop("global_step")[0], dtype=tf.int64) for i, summary in enumerate(current_iteration.summaries): with summary_ops_v2.create_file_writer( summary.logdir).as_default(): with summary_ops_v2.record_summaries_every_n_global_steps( n=self.config.save_summary_steps, global_step=gs): for j, summary_fn in enumerate(summary_fns[i]): tensor = kwargs["summary_{}_{}".format(i, j)] summary_fn(tensor, step=gs) summary.clear_summary_tuples() return tf.compat.v1.summary.all_v2_summary_ops() return _host_call_fn, summary_kwargs
def _create_estimator_spec(head, features, labels, mode, logits, use_tpu): """Creates the head's EstimatorSpec or TPUEstimatorSpec on TPU.""" if use_tpu: create_spec_fn = head._create_tpu_estimator_spec # pylint: disable=protected-access else: create_spec_fn = head.create_estimator_spec return create_spec_fn(features=features, labels=labels, mode=mode, logits=logits, train_op_fn=lambda _: tf.no_op())
def _best_eval_metrics_fn(*args): """Returns the best eval metrics.""" with tf_compat.v1.variable_scope("best_eval_metrics"): args = list(args) idx, idx_update_op = tf_compat.v1.metrics.mean(args.pop()) idx = tf.cast(idx, tf.int32) metric_fns = self._candidates_eval_metrics_store.metric_fns metric_fn_args = self._candidates_eval_metrics_store.pack_args( args[:len(candidate_args)]) candidate_grouped_metrics = self._group_metric_ops( metric_fns, metric_fn_args) metric_fns = self._subnetworks_eval_metrics_store.metric_fns metric_fn_args = self._subnetworks_eval_metrics_store.pack_args( args[(len(args) - len(subnetwork_args)):]) subnetwork_grouped_metrics = self._group_metric_ops( metric_fns, metric_fn_args) eval_metric_ops = {} for metric_name in sorted(candidate_grouped_metrics): metric_ops = candidate_grouped_metrics[metric_name] if len(metric_ops) != len(self._candidates): continue if metric_name == "loss": continue values, ops = list(six.moves.zip(*metric_ops)) best_value = tf.stack(values)[idx] # All tensors in this function have been outfed from the TPU, so we # must update them manually, otherwise the TPU will hang indefinitely # for the value of idx to update. ops = list(ops) ops.append(idx_update_op) # Bundle subnetwork eval metric ops and ensemble "loss"" ops (which # is a restricted Estimator keyword) into other metric ops so that # they are computed. ensemble_loss_ops = candidate_grouped_metrics.get( "loss", tf.no_op()) all_ops = tf.group(ops, ensemble_loss_ops, subnetwork_grouped_metrics) eval_metric_ops[metric_name] = (best_value, all_ops) iteration_number = tf.constant(self._iteration_number) eval_metric_ops["iteration"] = (iteration_number, iteration_number) if self._replay_indices_for_all: _replay_eval_metrics(idx, eval_metric_ops) # tf.estimator.Estimator does not allow a "loss" key to be present in # its eval_metrics. assert "loss" not in eval_metric_ops return eval_metric_ops
def build_train_op(self, ensemble, loss, var_list, labels, iteration_step, summary, previous_ensemble): del labels, iteration_step, summary, previous_ensemble # unused optimizer = self._optimizer if callable(optimizer): optimizer = optimizer() if optimizer is None: return tf.no_op() # The AdaNet Estimator is responsible for incrementing the global step. return optimizer.minimize(loss=loss + ensemble.complexity_regularization, var_list=var_list)
def update_partial(self, policy, tau=1.0): """Update the current policy with another policy. This would include copying the variables from the other policy. Args: policy: Another policy it can update from. tau: A float scalar in [0, 1]. When tau is 1.0 (the default), we do a hard update. This is used for trainable variables. Returns: An TF op to do the update. """ if self.variables(): policy_vars = policy.variables() return common.soft_variables_update( policy_vars, self.variables()[:len(policy_vars)], tau=tau, tau_non_trainable=None, sort_variables_by_name=True) else: return tf.no_op()
def _mixture_weights_train_op_fn(loss, var_list): self.assertLen(var_list, want_ensemble_trainable_vars) self.assertEqual( var_list, tf_compat.v1.get_collection( tf_compat.v1.GraphKeys.TRAINABLE_VARIABLES)) # Subnetworks get iteration steps instead of global steps. self.assertEqual("ensemble_test/iteration_step", tf_compat.v1.train.get_global_step().op.name) # Subnetworks get scoped summaries. self.assertEqual("fake_scalar", tf_compat.v1.summary.scalar("scalar", 1.)) self.assertEqual("fake_image", tf_compat.v1.summary.image("image", 1.)) self.assertEqual("fake_histogram", tf_compat.v1.summary.histogram("histogram", 1.)) self.assertEqual("fake_audio", tf_compat.v1.summary.audio("audio", 1., 1.)) if not var_list: return tf.no_op() optimizer = tf_compat.v1.train.GradientDescentOptimizer( learning_rate=.1) return optimizer.minimize(loss, var_list=var_list)
def _assert_non_singular(self): return tf.no_op('assert_non_singular')
def _if_should_apply_grads(grads): if isinstance(self._loss_scale, _DynamicLossScaleState): return self._loss_scale.update(grads) else: return (tf.no_op(), True)
def test_build_train_op_no_op(self): with context.graph_mode(): train_op = ensemble.ComplexityRegularizedEnsembler().build_train_op( *[None] * 7) # arguments unused self.assertEqual(train_op.type, tf.no_op().type)
def train_step(self, initial_env_step: dataset_lib.EnvStep, experience: dataset_lib.EnvStep, target_policy: tf_policy.TFPolicy): """Performs a single training step based on batch. Args: initial_env_step: A batch of initial steps. experience: A batch of transitions. Elements must have shape [batch_size, 2, ...]. target_policy: The policy whose value we want to estimate. Returns: The losses and the train op. """ env_step = tf.nest.map_structure(lambda t: t[:, 0, ...], experience) next_env_step = tf.nest.map_structure(lambda t: t[:, 1, ...], experience) with tf.GradientTape( watch_accessed_variables=False, persistent=True) as tape: tape.watch(self._nu_network.variables) tape.watch(self._zeta_network.variables) tape.watch(self._weight_network.variables) tape.watch([self._alpha]) nu_loss, zeta_loss = self.train_loss(initial_env_step, env_step, next_env_step, target_policy) nu_reg = self._nu_regularizer * self._orthogonal_regularization( self._nu_network) zeta_reg = self._zeta_regularizer * self._orthogonal_regularization( self._zeta_network) # Binary search to find best alpha. left = self._alpha - 1 * tf.ones_like(self._two_sided_limit) right = self._alpha + 1 * tf.ones_like(self._two_sided_limit) for _ in range(4): mid = 0.5 * (left + right) weights, log_weights = self._get_weights( initial_env_step, env_step, next_env_step, nu_loss, alpha=mid) divergence = self._compute_divergence(weights, log_weights) divergence_violation = divergence - self._two_sided_limit left = tf.where(divergence_violation > 0., mid, left) right = tf.where(divergence_violation > 0., right, mid) best_alpha = 0.5 * (left + right) self._alpha.assign(0.05 * best_alpha + 0.95 * self._alpha) weights, log_weights = self._get_weights(initial_env_step, env_step, next_env_step, nu_loss) divergence = self._compute_divergence(weights, log_weights) divergence_violation = divergence - self._two_sided_limit weighted_nu_loss = tf.reshape( nu_loss, [-1, self._num_limits, 2]) * weights[:, :, None] weighted_zeta_loss = tf.reshape( zeta_loss, [-1, self._num_limits, 2]) * weights[:, :, None] # Multiplier to make all weight optimizations minimizations. # Takes into account that sign of algae_alpha determines whether nu_loss # has switched signs (since all of nu_loss is used for minimization). weight_loss_multiplier = self._algae_alpha_sign * tf.concat( 2 * [tf.ones_like(self._divergence_limit)] + 2 * [-tf.ones_like(self._divergence_limit)], axis=-1) weight_loss = tf.reduce_mean( tf.reshape(weight_loss_multiplier * nu_loss, [-1, self._num_limits, 2]), 1) weight_loss += tf.exp(self._alpha) * divergence_violation reg_weighted_nu_loss = (weighted_nu_loss + nu_reg) reg_weighted_zeta_loss = (weighted_zeta_loss + nu_reg) alpha_loss = (-tf.exp(self._alpha) * tf.stop_gradient(divergence_violation)) nu_grads = tape.gradient(reg_weighted_nu_loss, self._nu_network.variables) nu_grad_op = self._nu_optimizer.apply_gradients( zip(nu_grads, self._nu_network.variables)) zeta_grads = tape.gradient(reg_weighted_zeta_loss, self._zeta_network.variables) zeta_grad_op = self._zeta_optimizer.apply_gradients( zip(zeta_grads, self._zeta_network.variables)) if not self._closed_form_weights: weight_grads = tape.gradient(weight_loss, self._weight_network.variables) weight_grad_op = self._weight_optimizer.apply_gradients( zip(weight_grads, self._weight_network.variables)) else: weight_grad_op = tf.group() alpha_grads = tape.gradient(alpha_loss, [self._alpha]) #alpha_grad_op = self._alpha_optimizer.apply_gradients( # zip(alpha_grads, [self._alpha])) alpha_grad_op = tf.no_op() for idx in range(self._num_limits): tf.summary.scalar('divergence%d' % idx, divergence[idx]) tf.summary.scalar('nu_loss%d' % idx, tf.reduce_mean(nu_loss, 0)[idx]) tf.summary.scalar('zeta_loss%d' % idx, tf.reduce_mean(zeta_loss, 0)[idx]) tf.summary.scalar('exp_alpha%d' % idx, tf.exp(self._alpha[idx])) tf.summary.histogram('weights%d' % idx, weights[:, idx]) estimate = tf.reduce_mean( weighted_nu_loss * tf.reshape(self._algae_alpha_sign, [self._num_limits, 2]), axis=[0, -1]) return ((estimate, tf.reshape(tf.reduce_mean(weighted_nu_loss, [0]), [-1]), tf.reshape(tf.reduce_mean(weighted_zeta_loss, [0]), [-1]), tf.reduce_mean(weight_loss, 0), alpha_loss, divergence), tf.group(nu_grad_op, zeta_grad_op, weight_grad_op, alpha_grad_op))
def _make_metrics(self, metric_fn, mode=tf.estimator.ModeKeys.EVAL, multi_head=False, sess=None): with context.graph_mode(): if multi_head: head = multi_head_lib.MultiHead(heads=[ binary_class_head.BinaryClassHead( name="head1", loss_reduction=tf_compat.SUM), binary_class_head.BinaryClassHead( name="head2", loss_reduction=tf_compat.SUM) ]) labels = { "head1": tf.constant([0, 1]), "head2": tf.constant([0, 1]) } else: head = binary_class_head.BinaryClassHead( loss_reduction=tf_compat.SUM) labels = tf.constant([0, 1]) features = {"x": tf.constant([[1.], [2.]])} builder = _EnsembleBuilder(head, metric_fn=metric_fn) subnetwork_manager = _SubnetworkManager(head, metric_fn=metric_fn) subnetwork_builder = _Builder(lambda unused0, unused1: tf.no_op(), lambda unused0, unused1: tf.no_op(), use_logits_last_layer=True) subnetwork_spec = subnetwork_manager.build_subnetwork_spec( name="test", subnetwork_builder=subnetwork_builder, summary=_FakeSummary(), features=features, mode=mode, labels=labels) ensemble_spec = builder.build_ensemble_spec( name="test", candidate=EnsembleCandidate("foo", [subnetwork_builder], None), ensembler=ComplexityRegularizedEnsembler( mixture_weight_type=MixtureWeightType.SCALAR), subnetwork_specs=[subnetwork_spec], summary=_FakeSummary(), features=features, iteration_number=0, labels=labels, mode=mode, my_ensemble_index=0, previous_ensemble_spec=None, previous_iteration_checkpoint=None) subnetwork_metric_ops = subnetwork_spec.eval_metrics.eval_metrics_ops( ) ensemble_metric_ops = ensemble_spec.eval_metrics.eval_metrics_ops() evaluate = self.evaluate if sess is not None: evaluate = sess.run evaluate((tf_compat.v1.global_variables_initializer(), tf_compat.v1.local_variables_initializer())) evaluate((subnetwork_metric_ops, ensemble_metric_ops)) # Return the idempotent tensor part of the (tensor, op) metrics tuple. return { k: evaluate(subnetwork_metric_ops[k][0]) for k in subnetwork_metric_ops }, { k: evaluate(ensemble_metric_ops[k][0]) for k in ensemble_metric_ops }
class IterationBuilderTest(tu.AdanetTestCase): @parameterized.named_parameters( { "testcase_name": "negative_max_steps", "max_steps": -1, }, { "testcase_name": "zero_max_steps", "max_steps": 0, }) @test_util.run_in_graph_and_eager_modes def test_init_errors(self, max_steps): with self.assertRaises(ValueError): _IterationBuilder( _FakeCandidateBuilder(), _FakeSubnetworkManager(), _FakeEnsembleBuilder(), summary_maker=_ScopedSummary, ensemblers=[_FakeEnsembler()], max_steps=max_steps) # pylint: disable=g-long-lambda @parameterized.named_parameters( { "testcase_name": "single_subnetwork_fn", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("training")], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_best_candidate_index": 0, }, { "testcase_name": "single_subnetwork_fn_mock_summary", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("training")], "summary_maker": functools.partial(_TPUScopedSummary, logdir="/tmp/fakedir"), "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_best_candidate_index": 0, }, { "testcase_name": "single_subnetwork_with_eval_metrics", "ensemble_builder": _FakeEnsembleBuilder(eval_metric_ops_fn=lambda: {"a": (tf.constant(1), tf.constant(2))}), "subnetwork_builders": [_FakeBuilder("training",),], "mode": tf.estimator.ModeKeys.EVAL, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_eval_metric_ops": ["a", "iteration"], "want_best_candidate_index": 0, }, { "testcase_name": "single_subnetwork_with_non_tensor_eval_metric_op", "ensemble_builder": _FakeEnsembleBuilder(eval_metric_ops_fn=lambda: {"a": (tf.constant(1), tf.no_op())}), "subnetwork_builders": [_FakeBuilder("training",),], "mode": tf.estimator.ModeKeys.EVAL, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_eval_metric_ops": ["a", "iteration"], "want_best_candidate_index": 0, }, { "testcase_name": "single_subnetwork_done_training_fn", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("done")], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_best_candidate_index": 0, }, { "testcase_name": "single_dict_predictions_subnetwork_fn", "ensemble_builder": _FakeEnsembleBuilder(dict_predictions=True), "subnetwork_builders": [_FakeBuilder("training")], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": { "classes": 2, "logits": 2.129 }, "want_best_candidate_index": 0, }, { "testcase_name": "previous_ensemble", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("training")], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "previous_iteration": lambda: _FakeIteration( tu.dummy_ensemble_spec("old", variables=[tf.Variable(1.)])), "want_loss": 1.403943, "want_predictions": 2.129, "want_best_candidate_index": 1, }, { "testcase_name": "previous_ensemble_is_best", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("training")], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "previous_iteration": lambda: _FakeIteration( tu.dummy_ensemble_spec( "old", random_seed=12, variables=[tf.Variable(1.)])), "want_loss": -.437, "want_predictions": .688, "want_best_candidate_index": 0, }, { "testcase_name": "previous_ensemble_spec_and_eval_metrics", "ensemble_builder": _FakeEnsembleBuilder(eval_metric_ops_fn=lambda: {"a": (tf.constant(1), tf.constant(2))}), "subnetwork_builders": [_FakeBuilder("training")], "mode": tf.estimator.ModeKeys.EVAL, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "previous_iteration": lambda: _FakeIteration( tu.dummy_ensemble_spec( "old", eval_metrics=tu.create_ensemble_metrics( metric_fn=lambda: {"a": (tf.constant(1), tf.constant(2))}), variables=[tf.Variable(1.)])), "want_loss": 1.403943, "want_predictions": 2.129, "want_eval_metric_ops": ["a", "iteration"], "want_best_candidate_index": 1, }, { "testcase_name": "two_subnetwork_fns", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.40394, "want_predictions": 2.129, "want_best_candidate_index": 0, }, { "testcase_name": "two_subnetwork_fns_other_best", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=12) ], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": -.437, "want_predictions": .688, "want_best_candidate_index": 1, }, { "testcase_name": "two_subnetwork_one_training_fns", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("training"), _FakeBuilder("done", random_seed=7)], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_best_candidate_index": 0, }, { "testcase_name": "two_subnetwork_done_training_fns", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("done"), _FakeBuilder("done1", random_seed=7)], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_best_candidate_index": 0, }, { "testcase_name": "two_dict_predictions_subnetwork_fns", "ensemble_builder": _FakeEnsembleBuilder(dict_predictions=True), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.404, "want_predictions": { "classes": 2, "logits": 2.129 }, "want_best_candidate_index": 0, }, { "testcase_name": "two_dict_predictions_subnetwork_fns_predict_classes", "ensemble_builder": _FakeEnsembleBuilder( dict_predictions=True, export_output_key=tu.ExportOutputKeys.CLASSIFICATION_CLASSES), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "mode": tf.estimator.ModeKeys.PREDICT, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.404, "want_predictions": { "classes": 2, "logits": 2.129 }, "want_best_candidate_index": 0, "want_export_outputs": { tu.ExportOutputKeys.CLASSIFICATION_CLASSES: [2.129], "serving_default": [2.129], }, }, { "testcase_name": "two_dict_predictions_subnetwork_fns_predict_scores", "ensemble_builder": _FakeEnsembleBuilder( dict_predictions=True, export_output_key=tu.ExportOutputKeys.CLASSIFICATION_SCORES), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "mode": tf.estimator.ModeKeys.PREDICT, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.404, "want_predictions": { "classes": 2, "logits": 2.129 }, "want_best_candidate_index": 0, "want_export_outputs": { tu.ExportOutputKeys.CLASSIFICATION_SCORES: [2.129], "serving_default": [2.129], }, }, { "testcase_name": "two_dict_predictions_subnetwork_fns_predict_regression", "ensemble_builder": _FakeEnsembleBuilder( dict_predictions=True, export_output_key=tu.ExportOutputKeys.REGRESSION), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "mode": tf.estimator.ModeKeys.PREDICT, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_predictions": { "classes": 2, "logits": 2.129 }, "want_best_candidate_index": 0, "want_export_outputs": { tu.ExportOutputKeys.REGRESSION: 2.129, "serving_default": 2.129, }, }, { "testcase_name": "two_dict_predictions_subnetwork_fns_predict_prediction", "ensemble_builder": _FakeEnsembleBuilder( dict_predictions=True, export_output_key=tu.ExportOutputKeys.PREDICTION), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "mode": tf.estimator.ModeKeys.PREDICT, "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_predictions": { "classes": 2, "logits": 2.129 }, "want_best_candidate_index": 0, "want_export_outputs": { tu.ExportOutputKeys.PREDICTION: { "classes": 2, "logits": 2.129 }, "serving_default": { "classes": 2, "logits": 2.129 }, }, }, { "testcase_name": "chief_session_run_hook", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [ _FakeBuilder("training", chief_hook=tu.ModifierSessionRunHook()) ], "features": lambda: [[1., -1., 0.]], "labels": lambda: [1], "want_loss": 1.403943, "want_predictions": 2.129, "want_best_candidate_index": 0, "want_chief_hooks": True, }) @test_util.run_in_graph_and_eager_modes def test_build_iteration(self, ensemble_builder, subnetwork_builders, features, labels, want_predictions, want_best_candidate_index, want_eval_metric_ops=(), previous_iteration=None, want_loss=None, want_export_outputs=None, mode=tf.estimator.ModeKeys.TRAIN, summary_maker=_ScopedSummary, want_chief_hooks=False): with context.graph_mode(): tf_compat.v1.train.create_global_step() builder = _IterationBuilder( _FakeCandidateBuilder(), _FakeSubnetworkManager(), ensemble_builder, summary_maker=summary_maker, ensemblers=[_FakeEnsembler()], max_steps=1) iteration = builder.build_iteration( base_global_step=0, iteration_number=0, ensemble_candidates=[ EnsembleCandidate(b.name, [b], None) for b in subnetwork_builders ], previous_iteration=previous_iteration() if previous_iteration else None, subnetwork_builders=subnetwork_builders, features=features(), labels=labels(), mode=mode, config=tf.estimator.RunConfig(model_dir=self.test_subdirectory)) init = tf.group(tf_compat.v1.global_variables_initializer(), tf_compat.v1.local_variables_initializer()) self.evaluate(init) estimator_spec = iteration.estimator_spec if want_chief_hooks: self.assertNotEmpty(iteration.estimator_spec.training_chief_hooks) self.assertAllClose( want_predictions, self.evaluate(estimator_spec.predictions), atol=1e-3) # A default architecture metric is always included, even if we don't # specify one. eval_metric_ops = estimator_spec.eval_metric_ops if "architecture/adanet/ensembles" in eval_metric_ops: del eval_metric_ops["architecture/adanet/ensembles"] self.assertEqual(set(want_eval_metric_ops), set(eval_metric_ops.keys())) self.assertEqual(want_best_candidate_index, self.evaluate(iteration.best_candidate_index)) if mode == tf.estimator.ModeKeys.PREDICT: self.assertIsNotNone(estimator_spec.export_outputs) self.assertAllClose( want_export_outputs, self.evaluate( _export_output_tensors(estimator_spec.export_outputs)), atol=1e-3) self.assertIsNone(iteration.estimator_spec.train_op) self.assertIsNone(iteration.estimator_spec.loss) self.assertIsNotNone(want_export_outputs) return self.assertAlmostEqual( want_loss, self.evaluate(iteration.estimator_spec.loss), places=3) self.assertIsNone(iteration.estimator_spec.export_outputs) if mode == tf.estimator.ModeKeys.TRAIN: self.evaluate(iteration.estimator_spec.train_op) @parameterized.named_parameters( { "testcase_name": "empty_subnetwork_builders", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [], "want_raises": ValueError, }, { "testcase_name": "same_subnetwork_builder_names", "ensemble_builder": _FakeEnsembleBuilder(), "subnetwork_builders": [_FakeBuilder("same_name"), _FakeBuilder("same_name")], "want_raises": ValueError, }, { "testcase_name": "same_ensembler_names", "ensemble_builder": _FakeEnsembleBuilder(), "multiple_candidates": True, "subnetwork_builders": [_FakeBuilder("fake_builder_name")], "want_raises": ValueError, }, { "testcase_name": "predict_invalid", "ensemble_builder": _FakeEnsembleBuilder( dict_predictions=True, export_output_key=tu.ExportOutputKeys.INVALID), "subnetwork_builders": [ _FakeBuilder("training"), _FakeBuilder("training2", random_seed=7) ], "mode": tf.estimator.ModeKeys.PREDICT, "want_raises": TypeError, }) @test_util.run_in_graph_and_eager_modes def test_build_iteration_error(self, ensemble_builder, subnetwork_builders, want_raises, multiple_candidates=False, mode=tf.estimator.ModeKeys.TRAIN, summary_maker=_ScopedSummary): with context.graph_mode(): tf_compat.v1.train.create_global_step() builder = _IterationBuilder( _FakeCandidateBuilder(), _FakeSubnetworkManager(), ensemble_builder, summary_maker=summary_maker, ensemblers=[_FakeEnsembler()], max_steps=100) features = [[1., -1., 0.]] labels = [1] ensemble_candidates = [ EnsembleCandidate("test", subnetwork_builders, None) ] if multiple_candidates: ensemble_candidates += [ EnsembleCandidate("test", subnetwork_builders, None) ] with self.assertRaises(want_raises): builder.build_iteration( base_global_step=0, iteration_number=0, ensemble_candidates=ensemble_candidates, subnetwork_builders=subnetwork_builders, features=features, labels=labels, mode=mode, config=tf.estimator.RunConfig(model_dir=self.test_subdirectory))
def build_subnetwork_train_op(self, subnetwork, loss, var_list, labels, iteration_step, summary, previous_ensemble): if self._chief_hook: return TrainOpSpec( train_op=tf.no_op(), chief_hooks=[self._chief_hook], hooks=None) return None
class ReportMaterializerTest(parameterized.TestCase, tf.test.TestCase): # pylint: disable=g-long-lambda @parameterized.named_parameters( { "testcase_name": "one_empty_subnetwork", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo": subnetwork.Report(hparams={}, attributes={}, metrics={}), }, "steps": 3, "included_subnetwork_names": ["foo"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams={}, attributes={}, metrics={}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "one_subnetwork", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo": subnetwork.Report( hparams={ "learning_rate": 1.e-5, "optimizer": "sgd", "num_layers": 0, "use_side_inputs": True, }, attributes={ "weight_norms": tf.constant(3.14), "foo": tf.constant("bar"), "parameters": tf.constant(7777), "boo": tf.constant(True), }, metrics={}, ), }, "steps": 3, "included_subnetwork_names": ["foo"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams={ "learning_rate": 1.e-5, "optimizer": "sgd", "num_layers": 0, "use_side_inputs": True, }, attributes={ "weight_norms": 3.14, "foo": "bar", "parameters": 7777, "boo": True, }, metrics={}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "one_subnetwork_iteration_2", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo": subnetwork.Report( hparams={ "learning_rate": 1.e-5, "optimizer": "sgd", "num_layers": 0, "use_side_inputs": True, }, attributes={ "weight_norms": tf.constant(3.14), "foo": tf.constant("bar"), "parameters": tf.constant(7777), "boo": tf.constant(True), }, metrics={}, ), }, "steps": 3, "iteration_number": 2, "included_subnetwork_names": ["foo"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=2, name="foo", hparams={ "learning_rate": 1.e-5, "optimizer": "sgd", "num_layers": 0, "use_side_inputs": True, }, attributes={ "weight_norms": 3.14, "foo": "bar", "parameters": 7777, "boo": True, }, metrics={}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "two_subnetworks", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo1": subnetwork.Report( hparams={ "learning_rate": 1.e-5, "optimizer": "sgd", "num_layers": 0, "use_side_inputs": True, }, attributes={ "weight_norms": tf.constant(3.14), "foo": tf.constant("bar"), "parameters": tf.constant(7777), "boo": tf.constant(True), }, metrics={}, ), "foo2": subnetwork.Report( hparams={ "learning_rate": 1.e-6, "optimizer": "sgd", "num_layers": 1, "use_side_inputs": True, }, attributes={ "weight_norms": tf.constant(3.1445), "foo": tf.constant("baz"), "parameters": tf.constant(7788), "boo": tf.constant(True), }, metrics={}, ), }, "steps": 3, "included_subnetwork_names": ["foo2"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo1", hparams={ "learning_rate": 1.e-5, "optimizer": "sgd", "num_layers": 0, "use_side_inputs": True, }, attributes={ "weight_norms": 3.14, "foo": "bar", "parameters": 7777, "boo": True, }, metrics={}, included_in_final_ensemble=False, ), subnetwork.MaterializedReport( iteration_number=0, name="foo2", hparams={ "learning_rate": 1.e-6, "optimizer": "sgd", "num_layers": 1, "use_side_inputs": True, }, attributes={ "weight_norms": 3.1445, "foo": "baz", "parameters": 7788, "boo": True, }, metrics={}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "two_subnetworks_zero_included", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo1": subnetwork.Report( hparams={}, attributes={}, metrics={}, ), "foo2": subnetwork.Report( hparams={}, attributes={}, metrics={}, ), }, "steps": 3, "included_subnetwork_names": [], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo1", hparams={}, attributes={}, metrics={}, included_in_final_ensemble=False, ), subnetwork.MaterializedReport( iteration_number=0, name="foo2", hparams={}, attributes={}, metrics={}, included_in_final_ensemble=False, ), ], }, { "testcase_name": "two_subnetworks_both_included", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo1": subnetwork.Report( hparams={}, attributes={}, metrics={}, ), "foo2": subnetwork.Report( hparams={}, attributes={}, metrics={}, ), }, "steps": 3, "included_subnetwork_names": ["foo1", "foo2"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo1", hparams={}, attributes={}, metrics={}, included_in_final_ensemble=True, ), subnetwork.MaterializedReport( iteration_number=0, name="foo2", hparams={}, attributes={}, metrics={}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "materialize_metrics", "input_fn": tu.dummy_input_fn([[1., 1.], [1., 1.], [1., 1.]], [[1.], [2.], [3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo": subnetwork.Report( hparams={}, attributes={}, metrics={"moo": tf_compat.v1.metrics.mean(labels)}, ), }, "steps": 3, "included_subnetwork_names": ["foo"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams={}, attributes={}, metrics={"moo": 2.}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "materialize_metrics_none_steps", "input_fn": tu.dataset_input_fn([[1., 1.], [1., 1.], [1., 1.]], [[1.], [2.], [3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo": subnetwork.Report( hparams={}, attributes={}, metrics={"moo": tf_compat.v1.metrics.mean(labels)}, ), }, "steps": None, "included_subnetwork_names": ["foo"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams={}, attributes={}, metrics={"moo": 2.}, included_in_final_ensemble=True, ), ], }, { "testcase_name": "materialize_metrics_non_tensor_op", "input_fn": tu.dummy_input_fn([[1., 2]], [[3.]]), "subnetwork_reports_fn": lambda features, labels: { "foo": subnetwork.Report( hparams={}, attributes={}, metrics={"moo": (tf.constant(42), tf.no_op())}, ), }, "steps": 3, "included_subnetwork_names": ["foo"], "want_materialized_reports": [ subnetwork.MaterializedReport( iteration_number=0, name="foo", hparams={}, attributes={}, metrics={"moo": 42}, included_in_final_ensemble=True, ), ], }) @test_util.run_in_graph_and_eager_modes def test_materialize_subnetwork_reports(self, input_fn, subnetwork_reports_fn, steps, iteration_number=0, included_subnetwork_names=None, want_materialized_reports=None): with context.graph_mode(): tf.constant(0.) # dummy op so that the session graph is never empty. features, labels = input_fn() subnetwork_reports = subnetwork_reports_fn(features, labels) with self.test_session() as sess: sess.run(tf_compat.v1.initializers.local_variables()) report_materializer = ReportMaterializer(input_fn=input_fn, steps=steps) materialized_reports = ( report_materializer.materialize_subnetwork_reports( sess, iteration_number, subnetwork_reports, included_subnetwork_names)) self.assertEqual( len(want_materialized_reports), len(materialized_reports)) materialized_reports_dict = { blrm.name: blrm for blrm in materialized_reports } for want_materialized_report in want_materialized_reports: materialized_report = ( materialized_reports_dict[want_materialized_report.name]) self.assertEqual(iteration_number, materialized_report.iteration_number) self.assertEqual( set(want_materialized_report.hparams.keys()), set(materialized_report.hparams.keys())) for hparam_key, want_hparam in ( want_materialized_report.hparams.items()): if isinstance(want_hparam, float): self.assertAllClose(want_hparam, materialized_report.hparams[hparam_key]) else: self.assertEqual(want_hparam, materialized_report.hparams[hparam_key]) self.assertSetEqual( set(want_materialized_report.attributes.keys()), set(materialized_report.attributes.keys())) for attribute_key, want_attribute in ( want_materialized_report.attributes.items()): if isinstance(want_attribute, float): self.assertAllClose( want_attribute, decode(materialized_report.attributes[attribute_key])) else: self.assertEqual( want_attribute, decode(materialized_report.attributes[attribute_key])) self.assertSetEqual( set(want_materialized_report.metrics.keys()), set(materialized_report.metrics.keys())) for metric_key, want_metric in ( want_materialized_report.metrics.items()): if isinstance(want_metric, float): self.assertAllClose( want_metric, decode(materialized_report.metrics[metric_key])) else: self.assertEqual(want_metric, decode(materialized_report.metrics[metric_key]))
def eval_op_fn(loss): del loss return tf.no_op(), []
def build_subnetwork(self, features, labels, logits_dimension, training, iteration_step, summary, previous_ensemble, config=None): # We don't need an EVAL mode since AdaNet takes care of evaluation for us. mode = tf.estimator.ModeKeys.PREDICT if training: mode = tf.estimator.ModeKeys.TRAIN # Call in template to ensure that variables are created once and reused. call_model_fn_template = tf.compat.v1.make_template( "model_fn", self._call_model_fn) subestimator_features, subestimator_labels = features, labels local_init_ops = [] subestimator = self._subestimator(config) if training and subestimator.train_input_fn: # TODO: Consider tensorflow_estimator/python/estimator/util.py. inputs = subestimator.train_input_fn() if isinstance(inputs, (tf_compat.DatasetV1, tf_compat.DatasetV2)): subestimator_features, subestimator_labels = ( tf_compat.make_one_shot_iterator(inputs).get_next()) else: subestimator_features, subestimator_labels = inputs # Construct subnetwork graph first because of dependencies on scope. _, _, bagging_train_op_spec, sub_local_init_op = call_model_fn_template( subestimator, subestimator_features, subestimator_labels, mode, summary) # Graph for ensemble learning gets model_fn_1 for scope. logits, last_layer, _, ensemble_local_init_op = call_model_fn_template( subestimator, features, labels, mode, summary) if sub_local_init_op: local_init_ops.append(sub_local_init_op) if ensemble_local_init_op: local_init_ops.append(ensemble_local_init_op) # Run train op in a hook so that exceptions can be intercepted by the # AdaNet framework instead of the Estimator's monitored training session. hooks = bagging_train_op_spec.hooks + (_SecondaryTrainOpRunnerHook( bagging_train_op_spec.train_op), ) train_op_spec = subnetwork_lib.TrainOpSpec( train_op=tf.no_op(), chief_hooks=bagging_train_op_spec.chief_hooks, hooks=hooks) else: logits, last_layer, train_op_spec, local_init_op = call_model_fn_template( subestimator, features, labels, mode, summary) if local_init_op: local_init_ops.append(local_init_op) # TODO: Replace with variance complexity measure. complexity = tf.constant(0.) return subnetwork_lib.Subnetwork(logits=logits, last_layer=last_layer, shared={"train_op": train_op_spec}, complexity=complexity, local_init_ops=local_init_ops)
def initialize(self): if tf.executing_eagerly(): return tf.no_op() else: return self._initializers
def model_fn(features, labels, mode, params, config): """Build the model function for use in an estimator. Args: features: The input features for the estimator. labels: The labels, unused here. mode: Signifies whether it is train or test or predict. params: Some hyperparameters as a dictionary. config: The RunConfig, unused here. Returns: EstimatorSpec: A tf.estimator.EstimatorSpec instance. """ del labels, config encoder = make_encoder(params["activation"], params["num_topics"], params["layer_sizes"]) decoder, topics_words = make_decoder(params["num_topics"], features.shape[1]) topics_prior = make_prior(params["num_topics"], params["prior_initial_value"]) alpha = topics_prior.concentration topics_posterior = encoder(features) topics = topics_posterior.sample(seed=234) random_reconstruction = decoder(topics) reconstruction = random_reconstruction.log_prob(features) tf1.summary.scalar("reconstruction", tf.reduce_mean(reconstruction)) # Compute the KL-divergence between two Dirichlets analytically. # The sampled KL does not work well for "sparse" distributions # (see Appendix D of [2]). kl = tfd.kl_divergence(topics_posterior, topics_prior) tf1.summary.scalar("kl", tf.reduce_mean(kl)) # Ensure that the KL is non-negative (up to a very small slack). # Negative KL can happen due to numerical instability. with tf.control_dependencies( [tf.debugging.assert_greater(kl, -1e-3, message="kl")]): kl = tf.identity(kl) elbo = reconstruction - kl avg_elbo = tf.reduce_mean(elbo) tf1.summary.scalar("elbo", avg_elbo) loss = -avg_elbo # Perform variational inference by minimizing the -ELBO. global_step = tf1.train.get_or_create_global_step() optimizer = tf1.train.AdamOptimizer(params["learning_rate"]) # This implements the "burn-in" for prior parameters (see Appendix D of [2]). # For the first prior_burn_in_steps steps they are fixed, and then trained # jointly with the other parameters. grads_and_vars = optimizer.compute_gradients(loss) grads_and_vars_except_prior = [ x for x in grads_and_vars if x[1] not in topics_prior.variables] def train_op_except_prior(): return optimizer.apply_gradients( grads_and_vars_except_prior, global_step=global_step) def train_op_all(): return optimizer.apply_gradients( grads_and_vars, global_step=global_step) train_op = tf.cond( pred=global_step < params["prior_burn_in_steps"], true_fn=train_op_except_prior, false_fn=train_op_all) # The perplexity is an exponent of the average negative ELBO per word. words_per_document = tf.reduce_sum(features, axis=1) log_perplexity = -elbo / words_per_document tf1.summary.scalar("perplexity", tf.exp(tf.reduce_mean(log_perplexity))) (log_perplexity_tensor, log_perplexity_update) = tf1.metrics.mean(log_perplexity) perplexity_tensor = tf.exp(log_perplexity_tensor) # Obtain the topics summary. Implemented as a py_func for simplicity. topics = tf1.py_func( functools.partial(get_topics_strings, vocabulary=params["vocabulary"]), [topics_words, alpha], tf.string, stateful=False) tf1.summary.text("topics", topics) return tf1.estimator.EstimatorSpec( mode=mode, loss=loss, train_op=train_op, eval_metric_ops={ "elbo": tf1.metrics.mean(elbo), "reconstruction": tf1.metrics.mean(reconstruction), "kl": tf1.metrics.mean(kl), "perplexity": (perplexity_tensor, log_perplexity_update), "topics": (topics, tf.no_op()), }, )
def build_subnetwork_train_op(self, subnetwork, loss, var_list, labels, iteration_step, summary, previous_ensemble): return tf.no_op()
class ReportTest(parameterized.TestCase, tf.test.TestCase): # pylint: disable=g-long-lambda @parameterized.named_parameters( { "testcase_name": "empty", "hparams": {}, "attributes": lambda: {}, "metrics": lambda: {}, }, { "testcase_name": "non_empty", "hparams": { "hoo": 1 }, "attributes": lambda: { "aoo": tf.constant(1) }, "metrics": lambda: { "moo": (tf.constant(1), tf.constant(1)) }, }, { "testcase_name": "non_tensor_update_op", "hparams": { "hoo": 1 }, "attributes": lambda: { "aoo": tf.constant(1) }, "metrics": lambda: { "moo": (tf.constant(1), tf.no_op()) }, }) # pylint: enable=g-long-lambda @test_util.run_in_graph_and_eager_modes def test_new(self, hparams, attributes, metrics): with context.graph_mode(): _ = tf.constant(0) # Just to have a non-empty graph. report = Report( hparams=hparams, attributes=attributes(), metrics=metrics()) self.assertEqual(hparams, report.hparams) self.assertEqual( self.evaluate(attributes()), self.evaluate(report.attributes)) self.assertEqual(self.evaluate(metrics()), self.evaluate(report.metrics)) @test_util.run_in_graph_and_eager_modes def test_drop_non_scalar_metric(self): """Tests b/118632346.""" hparams = {"hoo": 1} attributes = {"aoo": tf.constant(1)} metrics = { "moo1": (tf.constant(1), tf.constant(1)), "moo2": (tf.constant([1, 1]), tf.constant([1, 1])), } want_metrics = metrics.copy() del want_metrics["moo2"] with self.test_session(): report = Report(hparams=hparams, attributes=attributes, metrics=metrics) self.assertEqual(hparams, report.hparams) self.assertEqual(attributes, report.attributes) self.assertEqual(want_metrics, report.metrics) @parameterized.named_parameters( { "testcase_name": "tensor_hparams", "hparams": { "hoo": tf.constant(1) }, "attributes": {}, "metrics": {}, }, { "testcase_name": "non_tensor_attributes", "hparams": {}, "attributes": { "aoo": 1, }, "metrics": {}, }, { "testcase_name": "non_tuple_metrics", "hparams": {}, "attributes": {}, "metrics": { "moo": tf.constant(1) }, }, { "testcase_name": "one_item_tuple_metrics", "hparams": {}, "attributes": {}, "metrics": { "moo": (tf.constant(1),) }, }) @test_util.run_in_graph_and_eager_modes def test_new_errors(self, hparams, attributes, metrics): with self.assertRaises(ValueError): Report(hparams=hparams, attributes=attributes, metrics=metrics)