def _host_call_fn(gs, loss, lr): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. Returns: List of summary ops to run on the CPU host. """ # Host call fns are executed FLAGS.iterations_per_loop times after one # TPU loop is finished, setting max_queue value to the same as number of # iterations will make the summary writer only flush the data to storage # once per loop. gs = gs[0] with summary.create_file_writer( FLAGS.model_dir, max_queue=FLAGS.iterations_per_loop).as_default(): with summary.always_record_summaries(): summary.scalar("loss", loss[0], step=gs) summary.scalar("learning_rate", lr[0], step=gs) return summary.all_summary_ops()
def train(model, optimizer, dataset, step_counter, log_interval=None): """Trains model on `dataset` using `optimizer`.""" from tensorflow.contrib import summary as contrib_summary # pylint: disable=g-import-not-at-top start = time.time() for (batch, (images, labels)) in enumerate(dataset): with contrib_summary.record_summaries_every_n_global_steps( 10, global_step=step_counter): # Record the operations used to compute the loss given the input, # so that the gradient of the loss with respect to the variables # can be computed. with tf.GradientTape() as tape: logits = model(images, training=True) loss_value = loss(logits, labels) contrib_summary.scalar('loss', loss_value) contrib_summary.scalar('accuracy', compute_accuracy(logits, labels)) grads = tape.gradient(loss_value, model.variables) optimizer.apply_gradients(list(zip(grads, model.variables)), global_step=step_counter) if log_interval and batch % log_interval == 0: rate = log_interval / (time.time() - start) print('Step #%d\tLoss: %.6f (%d steps/sec)' % (batch, loss_value, rate)) start = time.time()
def _update_critic_ddpg(self, obs, action, next_obs, reward, mask): """Updates parameters of ddpg critic given samples from the batch. Args: obs: A tfe.Variable with a batch of observations. action: A tfe.Variable with a batch of actions. next_obs: A tfe.Variable with a batch of next observations. reward: A tfe.Variable with a batch of rewards. mask: A tfe.Variable with a batch of masks. """ if self.use_absorbing_state: # Starting from the goal state we can execute only non-actions. a_mask = tf.maximum(0, mask) q_next = self.critic_target(next_obs, self.actor_target(next_obs) * a_mask) q_target = reward + self.discount * q_next else: # Without an absorbing state we assign rewards of 0. q_next = self.critic_target(next_obs, self.actor_target(next_obs)) q_target = reward + self.discount * mask * q_next with tf.GradientTape() as tape: q_pred = self.critic(obs, action) critic_loss = tf.losses.mean_squared_error(q_target, q_pred) grads = tape.gradient(critic_loss, self.critic.variables) self.critic_optimizer.apply_gradients(zip(grads, self.critic.variables), global_step=self.critic_step) with contrib_summary.record_summaries_every_n_global_steps( 100, self.critic_step): contrib_summary.scalar('critic/loss', critic_loss, step=self.critic_step)
def _compute_gradients(self, actions, discounted_rewards, weights=None, sequence_length=None, loss_str='train', use_entropy_regularization=True, **kwargs): """Implement the policy gradient in TF.""" if sequence_length is not None: seq_mask = tf.sequence_mask(sequence_length, dtype=tf.float32) else: seq_mask = None with tf.GradientTape(watch_accessed_variables=False) as tape: tape.watch(self.trainable_variables) # Returns 0.0 if critic is not being used value_loss = self._compute_value_loss( discounted_rewards, seq_mask=seq_mask, **kwargs) policy_loss = self._compute_policy_loss( discounted_rewards, actions, seq_mask=seq_mask, weights=weights, use_entropy_regularization=use_entropy_regularization, **kwargs) loss = tf.reduce_mean(policy_loss + value_loss) if self.log_summaries and (self._counter % self.log_every == 0): contrib_summary.scalar('{}_loss'.format(loss_str), loss) return tape.gradient(loss, self.trainable_variables)
def host_call_fn(gs, g_loss, d_loss, real_audio, generated_audio): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step g_loss: `Tensor` with shape `[batch]` for the generator loss. d_loss: `Tensor` with shape `[batch]` for the discriminator loss. real_audio: `Tensor` with shape `[batch, 8192, 1]` generated_audio: `Tensor` with shape `[batch, 8192, 1]` Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('g_loss', g_loss, step=gs) summary.scalar('d_loss', d_loss, step=gs) summary.audio('real_audio', real_audio, sample_rate=_FS, max_outputs=10, step=gs) summary.audio('generated_audio', generated_audio, sample_rate=_FS, max_outputs=10, step=gs) return summary.all_summary_ops()
def _host_call_fn(gs, loss, lr): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. Returns: List of summary ops to run on the CPU host. """ # Host call fns are executed FLAGS.iterations_per_loop times after one # TPU loop is finished, setting max_queue value to the same as number of # iterations will make the summary writer only flush the data to storage # once per loop. gs = gs[0] with summary.create_file_writer( FLAGS.model_dir, max_queue=FLAGS.iterations_per_loop).as_default(): with summary.always_record_summaries(): summary.scalar("loss", loss[0], step=gs) summary.scalar("learning_rate", lr[0], step=gs) return summary.all_summary_ops()
def _update_actor(self, obs, mask): """Updates parameters of critic given samples from the batch. Args: obs: A tfe.Variable with a batch of observations. mask: A tfe.Variable with a batch of masks. """ with tf.GradientTape() as tape: if self.use_td3: q_pred, _ = self.critic(obs, self.actor(obs)) else: q_pred = self.critic(obs, self.actor(obs)) if self.use_absorbing_state: # Don't update the actor for absorbing states. # And skip update if all states are absorbing. a_mask = 1.0 - tf.maximum(0, -mask) if tf.reduce_sum(a_mask) < 1e-8: return actor_loss = -tf.reduce_sum( q_pred * a_mask) / tf.reduce_sum(a_mask) else: actor_loss = -tf.reduce_mean(q_pred) grads = tape.gradient(actor_loss, self.actor.variables) # Clipping makes training more stable. grads, _ = tf.clip_by_global_norm(grads, 40.0) self.actor_optimizer.apply_gradients(zip(grads, self.actor.variables), global_step=self.actor_step) with contrib_summary.record_summaries_every_n_global_steps( 100, self.actor_step): contrib_summary.scalar('actor/loss', actor_loss, step=self.actor_step)
def host_call_fn(*tensors): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. """ gs = tensors[0][0] # Host call fns are executed params['iterations_per_loop'] # times after one TPU loop is finished, setting max_queue # value to the same as number of iterations will make the # summary writer only flush the data to storage once per # loop. summary_writer = summary.create_file_writer( config.checkpoint_dir, max_queue=config.get('iterations_per_loop', 1000)) with summary_writer.as_default(): with summary.always_record_summaries(): for idx in range(len(tensors_to_print)): summary.scalar(tensors_to_print_names[idx], tensors[idx][0], step=gs) return summary.all_summary_ops()
def host_call_fn(global_step, *args): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: global_step: `Tensor with shape `[batch]` for the global_step *args: Remaining tensors to log. Returns: List of summary ops to run on the CPU host. """ step = global_step[0] with contrib_summary.create_file_writer( logdir=model_dir, filename_suffix=".host_call").as_default(): with contrib_summary.always_record_summaries(): for i, name in enumerate(metric_names): contrib_summary.scalar(prefix + name, args[i][0], step=step) return contrib_summary.all_summary_ops()
def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cost, value_cost, l2_cost, combined_cost, est_mode=tf.estimator.ModeKeys.TRAIN): policy_entropy = -tf.reduce_mean( tf.reduce_sum(policy_output * tf.log(policy_output), axis=1)) # pi_tensor is one_hot when generated from sgfs (for supervised learning) # and soft-max when using self-play records. argmax normalizes the two. policy_target_top_1 = tf.argmax(pi_tensor, axis=1) policy_output_top_1 = tf.argmax(policy_output, axis=1) policy_output_in_top3 = tf.to_float( tf.nn.in_top_k(policy_output, policy_target_top_1, k=3)) policy_top_1_confidence = tf.reduce_max(policy_output, axis=1) policy_target_top_1_confidence = tf.boolean_mask( policy_output, tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1])) metric_ops = { 'policy_cost': tf.metrics.mean(policy_cost), 'value_cost': tf.metrics.mean(value_cost), 'l2_cost': tf.metrics.mean(l2_cost), 'policy_entropy': tf.metrics.mean(policy_entropy), 'combined_cost': tf.metrics.mean(combined_cost), 'policy_accuracy_top_1': tf.metrics.accuracy(labels=policy_target_top_1, predictions=policy_output_top_1), 'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3), 'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence), 'policy_target_top_1_confidence': tf.metrics.mean(policy_target_top_1_confidence), 'value_confidence': tf.metrics.mean(tf.abs(value_output)), } # Create summary ops so that they show up in SUMMARIES collection # That way, they get logged automatically during training summary_writer = summary.create_file_writer(FLAGS.model_dir) with summary_writer.as_default(), \ summary.record_summaries_every_n_global_steps(FLAGS.summary_steps): for metric_name, metric_op in metric_ops.items(): summary.scalar(metric_name, metric_op[1]) if est_mode == tf.estimator.ModeKeys.EVAL: return metric_ops return summary.all_summary_ops()
def host_call_fn(**kwargs): writer = contrib_summary.create_file_writer(summary_dir, max_queue=1000) always_record = contrib_summary.always_record_summaries() with writer.as_default(), always_record: for name, scalar in kwargs.items(): contrib_summary.scalar(name, tf.reduce_mean(scalar)) return contrib_summary.all_summary_ops()
def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cost, value_cost, l2_cost, combined_cost, step, est_mode=tf.estimator.ModeKeys.TRAIN): policy_entropy = -tf.reduce_mean(tf.reduce_sum( policy_output * tf.log(policy_output), axis=1)) # pi_tensor is one_hot when generated from sgfs (for supervised learning) # and soft-max when using self-play records. argmax normalizes the two. policy_target_top_1 = tf.argmax(pi_tensor, axis=1) policy_output_in_top1 = tf.to_float( tf.nn.in_top_k(policy_output, policy_target_top_1, k=1)) policy_output_in_top3 = tf.to_float( tf.nn.in_top_k(policy_output, policy_target_top_1, k=3)) policy_top_1_confidence = tf.reduce_max(policy_output, axis=1) policy_target_top_1_confidence = tf.boolean_mask( policy_output, tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1])) with tf.variable_scope("metrics"): metric_ops = { 'policy_cost': tf.metrics.mean(policy_cost), 'value_cost': tf.metrics.mean(value_cost), 'l2_cost': tf.metrics.mean(l2_cost), 'policy_entropy': tf.metrics.mean(policy_entropy), 'combined_cost': tf.metrics.mean(combined_cost), 'policy_accuracy_top_1': tf.metrics.mean(policy_output_in_top1), 'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3), 'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence), 'policy_target_top_1_confidence': tf.metrics.mean( policy_target_top_1_confidence), 'value_confidence': tf.metrics.mean(tf.abs(value_output)), } if est_mode == tf.estimator.ModeKeys.EVAL: return metric_ops # NOTE: global_step is rounded to a multiple of FLAGS.summary_steps. eval_step = tf.reduce_min(step) # Create summary ops so that they show up in SUMMARIES collection # That way, they get logged automatically during training summary_writer = summary.create_file_writer(FLAGS.work_dir) with summary_writer.as_default(), \ summary.record_summaries_every_n_global_steps( params['summary_steps'], eval_step): for metric_name, metric_op in metric_ops.items(): summary.scalar(metric_name, metric_op[1], step=eval_step) # Reset metrics occasionally so that they are mean of recent batches. reset_op = tf.variables_initializer(tf.local_variables("metrics")) cond_reset_op = tf.cond( tf.equal(eval_step % params['summary_steps'], tf.to_int64(1)), lambda: reset_op, lambda: tf.no_op()) return summary.all_summary_ops() + [cond_reset_op]
def host_call_fn(gs, *summary_tensors): gs = gs[0] with contrib_summary.create_file_writer( FLAGS.workdir).as_default(): with contrib_summary.always_record_summaries(): for name, reshaped_tensor in zip( summary_names, summary_tensors): contrib_summary.scalar( name, tf.reduce_mean(reshaped_tensor), step=gs) return contrib_summary.all_summary_ops()
def host_call_fn(gs, lr): # Outfeed supports int32 but global_step is expected to be int64. gs = tf.cast(tf.reduce_mean(gs), tf.int64) with summary.create_file_writer( self.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('learning_rate', tf.reduce_mean(lr), step=gs) return summary.all_summary_ops()
def _evaluate_eager(self, data_name): loss_metric = tfe.metrics.Mean('loss') accuracy_metric = tfe.metrics.Mean('accuracy') for (features, labels) in tfe.Iterator(self.data[data_name]): logits = self.model(features, training=False) loss_metric(self.loss(labels, logits)) accuracy_metric(self.accuracy(labels, logits)) avg_loss, avg_accuracy = loss_metric.result(), accuracy_metric.result() summary.scalar('loss', avg_loss) summary.scalar('accuracy', avg_accuracy) return avg_loss, avg_accuracy
def host_call_fn(global_step, *tensors): """Training host call.""" global_step = global_step[0] with contrib_summary.create_file_writer(summary_dir + '/metrics').as_default(): with contrib_summary.always_record_summaries(): for i, tensor in enumerate(tensors): contrib_summary.scalar(names[i], tensor[0], step=global_step) return contrib_summary.all_summary_ops()
def _train_eager_one_epoch(self): for (batch, (features, labels)) in enumerate(tfe.Iterator(self.data['train'])): with tfe.GradientTape() as tape: logits = self.model(features, training=True) train_loss = self.loss(labels, logits) train_accuracy = self.accuracy(labels, logits) grads = tape.gradient(train_loss, self.model.variables) self.optimizer.apply_gradients(zip(grads, self.model.variables), global_step=self.step_counter) summary.scalar('loss', train_loss) summary.scalar('accuracy', train_accuracy)
def _host_loss_summary(global_step, tf_loss, **scalars): """Add summary.scalar in host side.""" gs = tf.cast(global_step, tf.int64) sum_loss = contrib_summary.scalar( '{}_loss'.format(train_or_eval), tf_loss, step=gs) sum_ops = [sum_loss.op] for description, tf_metric in scalars.iteritems(): sum_metric = contrib_summary.scalar( '{}_{}'.format(train_or_eval, description), tf_metric, step=gs) sum_ops.append(sum_metric) with tf.control_dependencies(sum_ops): return tf.identity(tf_loss)
def host_call_fn(global_step, *tensors): """Training host call.""" global_step = global_step[0] with contrib_summary.create_file_writer( params.output_dir).as_default(): with contrib_summary.record_summaries_every_n_global_steps( n=params.log_every, global_step=global_step): for i, tensor in enumerate(tensors): if 'images' not in names[i]: contrib_summary.scalar(names[i], tensor[0], step=global_step) return contrib_summary.all_summary_ops()
def update(self, samples, contexts, dev_samples, dev_contexts): if self._counter % 20 == 0: # To prevent memory leaks in tf eager tf.set_random_seed(self._seed) actions, rews, weights, kwargs = self.create_batch( samples, contexts=contexts) dev_actions, dev_rews, dev_weights, dev_kwargs = self.create_batch( dev_samples, contexts=dev_contexts) trajs = (s.traj for s in samples) with tf.GradientTape( watch_accessed_variables=False, persistent=True) as tape0: tape0.watch(self._score_vars) scores = self.compute_scores(trajs, return_tensors=True) scores = [ tf.nn.softmax(x) for x in tf.split(scores, len(actions) // 10, axis=0) ] scores = tf.concat(scores, axis=0) rews = rews * tf.expand_dims(scores, axis=-1) grads = self._compute_gradients(actions, rews, weights, **kwargs) grads, _ = tf.clip_by_global_norm(grads, self.max_grad_norm) grads_and_vars = zip(grads, self.trainable_variables) new_vars = [v - self.learning_rate * g for g, v in grads_and_vars] self.optimizer.apply_gradients(grads_and_vars) grads_loss = self._compute_gradients( dev_actions, dev_rews, dev_weights, loss_str='dev', use_entropy_regularization=False, **dev_kwargs) score_grads = tape0.gradient( new_vars, self._score_vars, output_gradients=grads_loss) del tape0 score_grads_and_vars = self._score_grad_clipping( zip(score_grads, self._score_vars)) self.score_optimizer.apply_gradients( score_grads_and_vars, global_step=self.global_step) if self.log_summaries: grads = list(zip(*grads_and_vars)[0]) score_grads = list(zip(*score_grads_and_vars)[0]) contrib_summary.scalar('global_norm/train_grad', tf.global_norm(grads)) contrib_summary.scalar('global_norm/meta_grad', tf.global_norm(score_grads)) if self._debug and (self._counter % self.log_every == 0): tf.print( 'Epoch {} scores='.format(self._counter), scores[:20], summarize=10, output_stream=sys.stdout) self._counter += 1
def host_call_fn(gs, loss, lr, mix=None, gt_sources=None, est_sources=None): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. input: `Tensor` with shape `[batch, mix_samples, 1]` gt_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]` est_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]` Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer( model_config["model_base_dir"] + os.path.sep + str(model_config["experiment_id"])).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('learning_rate', lr[0], step=gs) if gs % 10000 == 0: with summary.record_summaries_every_n_global_steps( model_config["audio_summaries_every_n_steps"]): summary.audio('mix', mix, model_config['expected_sr'], max_outputs=model_config["num_sources"]) for source_id in range(gt_sources.shape[1].value): summary.audio('gt_sources_{source_id}'.format( source_id=source_id), gt_sources[:, source_id, :, :], model_config['expected_sr'], max_outputs=model_config["num_sources"]) summary.audio('est_sources_{source_id}'.format( source_id=source_id), est_sources[:, source_id, :, :], model_config['expected_sr'], max_outputs=model_config["num_sources"]) return summary.all_summary_ops()
def _setup_graph_training(self, accuracy, loss, optimizer): data_types, data_shapes = self.data['train'].output_types, \ self.data['train'].output_shapes self._iterator = tf.data.Iterator.from_structure( data_types, data_shapes) self._inits = { 'train': self._iterator.make_initializer(self.data['train']), 'val': self._iterator.make_initializer(self.data['val']), 'test': self._iterator.make_initializer(self.data['test']) } features, labels = self._iterator.get_next() training = tf.placeholder(tf.bool, name='training') self.logits = self.model(features, training=training) self.accuracy = accuracy(labels, self.logits) self.loss = loss(labels, self.logits) with self.summary.as_default(), summary.always_record_summaries(): summary.scalar('train_loss', self.loss) summary.scalar('train_accuracy', self.accuracy) with self.test_summary.as_default(), summary.always_record_summaries(): summary.scalar('val_loss', tf.placeholder(tf.float32, name='avg_loss')) summary.scalar('val_accuracy', tf.placeholder(tf.float32, name='avg_accuracy')) # Needed for BatchNorm to work extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(extra_update_ops): self.optimizer = optimizer.minimize(self.loss, global_step=self.step_counter)
def host_call_fn(*args): """Host call function to compute training summaries.""" scalars = _list_to_dicts(args, scalars_to_summarize.keys())[0] for name in scalars: scalars[name] = scalars[name][0] with contrib_summary.create_file_writer( summary_dir, max_queue=1000).as_default(): with contrib_summary.always_record_summaries(): for name, value in scalars.items(): if name not in ["global_step"]: contrib_summary.scalar( name, value, step=scalars["global_step"]) return contrib_summary.all_summary_ops()
def host_call_fn(gs, lr): """Training host call. Creates scalar summaries for training metrics. Args: gs: `Tensor with shape `[batch]` for the global_step lr: `Tensor` with shape `[batch]` for the learning_rate. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer( params['model_dir']).as_default(): with summary.always_record_summaries(): summary.scalar('learning_rate', lr[0], step=gs) return summary.all_summary_ops()
def host_call_fn(model_dir, **kwargs): """host_call function used for creating training summaries when using TPU. Args: model_dir: String indicating the output_dir to save summaries in. **kwargs: Set of metric names and tensor values for all desired summaries. Returns: Summary op to be passed to the host_call arg of the estimator function. """ gs = kwargs.pop('global_step')[0] with summary.create_file_writer(model_dir).as_default(): with summary.always_record_summaries(): for name, tensor in kwargs.iteritems(): summary.scalar(name, tensor[0], step=gs) return summary.all_summary_ops()
def test(model, dataset): """Perform an evaluation of `model` on the examples from `dataset`.""" from tensorflow.contrib import summary as contrib_summary # pylint: disable=g-import-not-at-top avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) accuracy = tf.keras.metrics.Accuracy('accuracy', dtype=tf.float32) for (images, labels) in dataset: logits = model(images, training=False) avg_loss.update_state(loss(logits, labels)) accuracy.update_state(tf.argmax(logits, axis=1, output_type=tf.int64), tf.cast(labels, tf.int64)) print('Test set: Average loss: %.4f, Accuracy: %4f%%\n' % (avg_loss.result(), 100 * accuracy.result())) with contrib_summary.always_record_summaries(): contrib_summary.scalar('loss', avg_loss.result()) contrib_summary.scalar('accuracy', accuracy.result())
def reconstruction_loss(self, ignore_values={}, summaries=False): value_gt = { nt.id: [] for nt in self.tree_def.node_types if nt.value_type is not None } value = { nt.id: [] for nt in self.tree_def.node_types if nt.value_type is not None } # first gather all the tensors and then compute the loss is ~3.5 time faster def gather(node): if node.value is not None: value[node.node_type_id].append(node.value.index) value_gt[node.node_type_id].append( node.tr_gt_value.abstract_value) list(map(gather, node.children)) list(map(gather, self.decoded_trees)) sample_distrib_error = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.get_stacked('distribs_idx'), logits=self.get_stacked('distribs_unscaled')) d_loss = tf.reduce_mean(sample_distrib_error) v_loss = 0 for k in value.keys(): if len(value[k]) > 0 and k not in ignore_values: vt = self.tree_def.id_map[k].value_type all_value_gt = vt.abstract_to_representation_batch(value_gt[k]) all_value_gen = tf.gather(self['vals_' + k], value[k]) if vt.class_value: vk_loss = tf.nn.softmax_cross_entropy_with_logits_v2( labels=all_value_gt, logits=all_value_gen) else: vk_loss = tf.reduce_mean(tf.square(all_value_gen - all_value_gt), axis=-1) reduced = tf.reduce_mean(vk_loss, axis=-1) tfs.scalar("loss/tr/values/" + k, reduced) v_loss += reduced # TODO handle the mixing of losses from different domain (i.e. properly weight them) return d_loss, v_loss
def summary(self, inputs, outputs, losses, step): super().summary(inputs, outputs, losses, step) with summary.always_record_summaries(): if step.numpy() % 20 == 0: summary.image('summary/tube', inputs['rgb'], max_images=1, step=step) summary.image('summary/map', inputs['map'], max_images=1, step=step) summary.image('summary/output', outputs['map'], max_images=1, step=step) summary.scalar('summary/loss', losses['loss'], step=step)
def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', tf.reduce_mean(loss), step=gs) summary.scalar('learning_rate', tf.reduce_mean(lr), step=gs) summary.scalar('current_epoch', tf.reduce_mean(ce), step=gs) return summary.all_summary_ops()
def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('learning_rate', lr[0], step=gs) summary.scalar('current_epoch', ce[0], step=gs) return summary.all_summary_ops()
def host_call_fn(gs, loss, acc, ce): #lr, ce): '''Training host call. Creates scalar summaries for training metrics. This funciton is executed on the CPU. As in, after :iterations_per_loop computation in TPU, control moves to the CPU where the summaries are updated. Arguments should match the list of 'Tensor' objects passed as the second element in the tuple passed to 'host_call'. Args: gs: Tensor with shape [batch] for global step loss: Tensor with shape [batch] for the training loss lr: Tensor with shape [batch] for the learning rate ce: Tensor with shape [batch] for the current epoch Returns: List of summary ops to run on the CPU host. ''' gs = gs[0] # Host call fns are executed FLAGS.iterations_per_loop times after # one TPU loop is finished, setting max_queue value to the same as # number of iterations will make the summary writer only flush the # data to storage once per loop. with summary.create_file_writer( FLAGS.model_dir, max_queue=FLAGS.iterations_per_loop).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('top_1', acc[0], step=gs) #summary.scalar('top_5', t5_acc[0], step=gs) #summary.scalar('learning_rate', lr[0], step=gs) summary.scalar('current_epoch', ce[0], step=gs) return summary.all_summary_ops()
def host_call_fn(global_step, total_loss, cross_entropy, regularization_loss, learning_rate, train_accuracy): global_step = global_step[0] with summary.create_file_writer( params['model_dir'], max_queue=params['iterations_per_loop']).as_default(): with summary.always_record_summaries(): summary.scalar('entire_loss', total_loss[0], step=global_step) summary.scalar('cross_entropy_loss', cross_entropy[0], step=global_step) summary.scalar('regularization_loss', regularization_loss[0], step=global_step) summary.scalar('learning_rate', learning_rate[0], step=global_step) summary.scalar('train_accuracy', train_accuracy[0], step=global_step) return summary.all_summary_ops()
def host_call_fn(gs, lpl, dcl, ls): gs = gs[0] with summary.create_file_writer(params['model_dir'], max_queue=params['save_checkpoints_steps']).as_default(): with summary.always_record_summaries(): summary.scalar('label_prediction_loss', lpl[0], step=gs) summary.scalar('domain_classification_loss', dcl[0], step=gs) summary.scalar('loss', ls[0], step=gs) return summary.all_summary_ops()