def host_call_fn(global_step, *args): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: global_step: `Tensor with shape `[batch]` for the global_step *args: Remaining tensors to log. Returns: List of summary ops to run on the CPU host. """ step = global_step[0] with contrib_summary.create_file_writer( logdir=model_dir, filename_suffix=".host_call").as_default(): with contrib_summary.always_record_summaries(): for i, name in enumerate(metric_names): contrib_summary.scalar(prefix + name, args[i][0], step=step) return contrib_summary.all_summary_ops()
def host_call_fn(*tensors): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. """ gs = tensors[0][0] # Host call fns are executed params['iterations_per_loop'] # times after one TPU loop is finished, setting max_queue # value to the same as number of iterations will make the # summary writer only flush the data to storage once per # loop. summary_writer = summary.create_file_writer( config.checkpoint_dir, max_queue=config.get('iterations_per_loop', 1000)) with summary_writer.as_default(): with summary.always_record_summaries(): for idx in range(len(tensors_to_print)): summary.scalar(tensors_to_print_names[idx], tensors[idx][0], step=gs) return summary.all_summary_ops()
def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('learning_rate', lr[0], step=gs) summary.scalar('current_epoch', ce[0], step=gs) return summary.all_summary_ops()
def host_call_fn(gs, g_loss, d_loss, real_audio, generated_audio): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step g_loss: `Tensor` with shape `[batch]` for the generator loss. d_loss: `Tensor` with shape `[batch]` for the discriminator loss. real_audio: `Tensor` with shape `[batch, 8192, 1]` generated_audio: `Tensor` with shape `[batch, 8192, 1]` Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('g_loss', g_loss, step=gs) summary.scalar('d_loss', d_loss, step=gs) summary.audio('real_audio', real_audio, sample_rate=_FS, max_outputs=10, step=gs) summary.audio('generated_audio', generated_audio, sample_rate=_FS, max_outputs=10, step=gs) return summary.all_summary_ops()
def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] # Host call fns are executed FLAGS.iterations_per_loop times after one # TPU loop is finished, setting max_queue value to the same as number of # iterations will make the summary writer only flush the data to storage # once per loop. with summary.create_file_writer( model_dir, max_queue=iterations_per_loop).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('learning_rate', lr[0], step=gs) summary.scalar('current_epoch', ce[0], step=gs) return summary.all_summary_ops()
def host_call_fn(gs, loss, lr, ce): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', tf.reduce_mean(loss), step=gs) summary.scalar('learning_rate', tf.reduce_mean(lr), step=gs) summary.scalar('current_epoch', tf.reduce_mean(ce), step=gs) return summary.all_summary_ops()
def _host_call_fn(gs, loss, lr): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. Returns: List of summary ops to run on the CPU host. """ # Host call fns are executed FLAGS.iterations_per_loop times after one # TPU loop is finished, setting max_queue value to the same as number of # iterations will make the summary writer only flush the data to storage # once per loop. gs = gs[0] with summary.create_file_writer( FLAGS.model_dir, max_queue=FLAGS.iterations_per_loop).as_default(): with summary.always_record_summaries(): summary.scalar("loss", loss[0], step=gs) summary.scalar("learning_rate", lr[0], step=gs) return summary.all_summary_ops()
def _host_call( concat_activations: tf.Tensor, concat_sequence_lengths: tf.Tensor, ) -> List[tf.Operation]: """Stores the activations and sequence lengths into a summary. TPUEstimator will concat the activations and sequence lengths from the minibatches on each core along axis=0 and pass them to this host call. This host call writes them to a file using the TF summary APIs. Args: concat_activations: The activations for the global batch. 2D Tensor(type=float32, shape=[batch_size, max_sequence_length]). concat_sequence_lengths: The sequence lengths for the global batch. 2D Tensor(type=int64, shape=[batch_size, max_sequence_length]). Returns: A list of summary ops for TPUEstimator to run on the host. """ with contrib_summary.create_file_writer(self._summary_dir).as_default(): with contrib_summary.always_record_summaries(): contrib_summary.generic( self._SUMMARY_ACTIVATIONS, concat_activations, ) contrib_summary.generic(self._SUMMARY_SEQUENCE_LENGTHS, concat_sequence_lengths) return contrib_summary.all_summary_ops()
def host_call_fn(gs, loss, acc, ce): #lr, ce): '''Training host call. Creates scalar summaries for training metrics. This funciton is executed on the CPU. As in, after :iterations_per_loop computation in TPU, control moves to the CPU where the summaries are updated. Arguments should match the list of 'Tensor' objects passed as the second element in the tuple passed to 'host_call'. Args: gs: Tensor with shape [batch] for global step loss: Tensor with shape [batch] for the training loss lr: Tensor with shape [batch] for the learning rate ce: Tensor with shape [batch] for the current epoch Returns: List of summary ops to run on the CPU host. ''' gs = gs[0] # Host call fns are executed FLAGS.iterations_per_loop times after # one TPU loop is finished, setting max_queue value to the same as # number of iterations will make the summary writer only flush the # data to storage once per loop. with summary.create_file_writer( FLAGS.model_dir, max_queue=FLAGS.iterations_per_loop).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('top_1', acc[0], step=gs) #summary.scalar('top_5', t5_acc[0], step=gs) #summary.scalar('learning_rate', lr[0], step=gs) summary.scalar('current_epoch', ce[0], step=gs) return summary.all_summary_ops()
def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cost, value_cost, l2_cost, combined_cost, est_mode=tf.estimator.ModeKeys.TRAIN): policy_entropy = -tf.reduce_mean( tf.reduce_sum(policy_output * tf.log(policy_output), axis=1)) # pi_tensor is one_hot when generated from sgfs (for supervised learning) # and soft-max when using self-play records. argmax normalizes the two. policy_target_top_1 = tf.argmax(pi_tensor, axis=1) policy_output_top_1 = tf.argmax(policy_output, axis=1) policy_output_in_top3 = tf.to_float( tf.nn.in_top_k(policy_output, policy_target_top_1, k=3)) policy_top_1_confidence = tf.reduce_max(policy_output, axis=1) policy_target_top_1_confidence = tf.boolean_mask( policy_output, tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1])) metric_ops = { 'policy_cost': tf.metrics.mean(policy_cost), 'value_cost': tf.metrics.mean(value_cost), 'l2_cost': tf.metrics.mean(l2_cost), 'policy_entropy': tf.metrics.mean(policy_entropy), 'combined_cost': tf.metrics.mean(combined_cost), 'policy_accuracy_top_1': tf.metrics.accuracy(labels=policy_target_top_1, predictions=policy_output_top_1), 'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3), 'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence), 'policy_target_top_1_confidence': tf.metrics.mean(policy_target_top_1_confidence), 'value_confidence': tf.metrics.mean(tf.abs(value_output)), } # Create summary ops so that they show up in SUMMARIES collection # That way, they get logged automatically during training summary_writer = summary.create_file_writer(FLAGS.model_dir) with summary_writer.as_default(), \ summary.record_summaries_every_n_global_steps(FLAGS.summary_steps): for metric_name, metric_op in metric_ops.items(): summary.scalar(metric_name, metric_op[1]) if est_mode == tf.estimator.ModeKeys.EVAL: return metric_ops return summary.all_summary_ops()
def host_call_fn(gs, loss, lr, ce, bi_list, bo_list, big_list, bog_list): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. ce: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] # Host call fns are executed params['iterations_per_loop'] times after # one TPU loop is finished, setting max_queue value to the same as # number of iterations will make the summary writer only flush the data # to storage once per loop. with summary.create_file_writer( FLAGS.model_dir, max_queue=params['iterations_per_loop']).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('learning_rate', lr[0], step=gs) summary.scalar('current_epoch', ce[0], step=gs) # TODO record distribution every 1251 steps (steps per epoch) with summary.record_summaries_every_n_global_steps( FLAGS.steps_per_eval): index = 0 for activ in bi_list: normal_histogram(activ, 'bn-input-' + str(index)) log_histogram(activ, 'bn-input-' + str(index)) index = index + 1 index = 0 for activ in bo_list: normal_histogram(activ, 'bn-output-' + str(index)) log_histogram(activ, 'bn-output-' + str(index)) index = index + 1 index = 0 for activ in big_list: normal_histogram(activ, 'bn-input-grad-' + str(index)) log_histogram(activ, 'bn-input-grad-' + str(index)) index = index + 1 index = 0 for activ in bog_list: normal_histogram(activ, 'bn-output-grad-' + str(index)) log_histogram(activ, 'bn-output-grad-' + str(index)) index = index + 1 return summary.all_summary_ops()
def host_call_fn(**kwargs): writer = contrib_summary.create_file_writer(summary_dir, max_queue=1000) always_record = contrib_summary.always_record_summaries() with writer.as_default(), always_record: for name, scalar in kwargs.items(): contrib_summary.scalar(name, tf.reduce_mean(scalar)) return contrib_summary.all_summary_ops()
def eval_metrics_host_call_fn(policy_output, value_output, pi_tensor, policy_cost, value_cost, l2_cost, combined_cost, step, est_mode=tf.estimator.ModeKeys.TRAIN): policy_entropy = -tf.reduce_mean(tf.reduce_sum( policy_output * tf.log(policy_output), axis=1)) # pi_tensor is one_hot when generated from sgfs (for supervised learning) # and soft-max when using self-play records. argmax normalizes the two. policy_target_top_1 = tf.argmax(pi_tensor, axis=1) policy_output_in_top1 = tf.to_float( tf.nn.in_top_k(policy_output, policy_target_top_1, k=1)) policy_output_in_top3 = tf.to_float( tf.nn.in_top_k(policy_output, policy_target_top_1, k=3)) policy_top_1_confidence = tf.reduce_max(policy_output, axis=1) policy_target_top_1_confidence = tf.boolean_mask( policy_output, tf.one_hot(policy_target_top_1, tf.shape(policy_output)[1])) with tf.variable_scope("metrics"): metric_ops = { 'policy_cost': tf.metrics.mean(policy_cost), 'value_cost': tf.metrics.mean(value_cost), 'l2_cost': tf.metrics.mean(l2_cost), 'policy_entropy': tf.metrics.mean(policy_entropy), 'combined_cost': tf.metrics.mean(combined_cost), 'policy_accuracy_top_1': tf.metrics.mean(policy_output_in_top1), 'policy_accuracy_top_3': tf.metrics.mean(policy_output_in_top3), 'policy_top_1_confidence': tf.metrics.mean(policy_top_1_confidence), 'policy_target_top_1_confidence': tf.metrics.mean( policy_target_top_1_confidence), 'value_confidence': tf.metrics.mean(tf.abs(value_output)), } if est_mode == tf.estimator.ModeKeys.EVAL: return metric_ops # NOTE: global_step is rounded to a multiple of FLAGS.summary_steps. eval_step = tf.reduce_min(step) # Create summary ops so that they show up in SUMMARIES collection # That way, they get logged automatically during training summary_writer = summary.create_file_writer(FLAGS.work_dir) with summary_writer.as_default(), \ summary.record_summaries_every_n_global_steps( params['summary_steps'], eval_step): for metric_name, metric_op in metric_ops.items(): summary.scalar(metric_name, metric_op[1], step=eval_step) # Reset metrics occasionally so that they are mean of recent batches. reset_op = tf.variables_initializer(tf.local_variables("metrics")) cond_reset_op = tf.cond( tf.equal(eval_step % params['summary_steps'], tf.to_int64(1)), lambda: reset_op, lambda: tf.no_op()) return summary.all_summary_ops() + [cond_reset_op]
def host_call_fn(gs, lpl, dcl, ls): gs = gs[0] with summary.create_file_writer(params['model_dir'], max_queue=params['save_checkpoints_steps']).as_default(): with summary.always_record_summaries(): summary.scalar('label_prediction_loss', lpl[0], step=gs) summary.scalar('domain_classification_loss', dcl[0], step=gs) summary.scalar('loss', ls[0], step=gs) return summary.all_summary_ops()
def host_call_fn(gs, loss, lr, ce): gs = gs[0] with summary.create_file_writer(FLAGS.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('learning_rate', lr[0], step=gs) summary.scalar('current_epoch', ce[0], step=gs) return summary.all_summary_ops()
def host_call_fn(gs, *summary_tensors): gs = gs[0] with contrib_summary.create_file_writer( FLAGS.workdir).as_default(): with contrib_summary.always_record_summaries(): for name, reshaped_tensor in zip( summary_names, summary_tensors): contrib_summary.scalar( name, tf.reduce_mean(reshaped_tensor), step=gs) return contrib_summary.all_summary_ops()
def host_call_fn(gs, lr): # Outfeed supports int32 but global_step is expected to be int64. gs = tf.cast(tf.reduce_mean(gs), tf.int64) with summary.create_file_writer( self.model_dir).as_default(): with summary.always_record_summaries(): summary.scalar('learning_rate', tf.reduce_mean(lr), step=gs) return summary.all_summary_ops()
def host_call_fn(global_step, *tensors): """Training host call.""" global_step = global_step[0] with contrib_summary.create_file_writer(summary_dir + '/metrics').as_default(): with contrib_summary.always_record_summaries(): for i, tensor in enumerate(tensors): contrib_summary.scalar(names[i], tensor[0], step=global_step) return contrib_summary.all_summary_ops()
def host_call_fn(global_step, *tensors): """Training host call.""" global_step = global_step[0] with contrib_summary.create_file_writer( params.output_dir).as_default(): with contrib_summary.record_summaries_every_n_global_steps( n=params.log_every, global_step=global_step): for i, tensor in enumerate(tensors): if 'images' not in names[i]: contrib_summary.scalar(names[i], tensor[0], step=global_step) return contrib_summary.all_summary_ops()
def host_call_fn(gs, scalar_values): """Returns summary.""" gs = gs[0] values = tf.unstack(scalar_values) with summary.create_file_writer( FLAGS.model_dir, max_queue=FLAGS.iterations_per_loop).as_default(): with summary.always_record_summaries(): for key, value in zip(tensorboard_scalars.keys(), values): tf.contrib.summary.scalar(key, value, step=gs) return summary.all_summary_ops()
def host_call_fn(gs, loss, lr, mix=None, gt_sources=None, est_sources=None): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: gs: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. lr: `Tensor` with shape `[batch]` for the learning_rate. input: `Tensor` with shape `[batch, mix_samples, 1]` gt_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]` est_sources: `Tensor` with shape `[batch, sources_n, output_samples, 1]` Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer( model_config["model_base_dir"] + os.path.sep + str(model_config["experiment_id"])).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=gs) summary.scalar('learning_rate', lr[0], step=gs) if gs % 10000 == 0: with summary.record_summaries_every_n_global_steps( model_config["audio_summaries_every_n_steps"]): summary.audio('mix', mix, model_config['expected_sr'], max_outputs=model_config["num_sources"]) for source_id in range(gt_sources.shape[1].value): summary.audio('gt_sources_{source_id}'.format( source_id=source_id), gt_sources[:, source_id, :, :], model_config['expected_sr'], max_outputs=model_config["num_sources"]) summary.audio('est_sources_{source_id}'.format( source_id=source_id), est_sources[:, source_id, :, :], model_config['expected_sr'], max_outputs=model_config["num_sources"]) return summary.all_summary_ops()
def host_call_fn(*args): """Host call function to compute training summaries.""" scalars = _list_to_dicts(args, scalars_to_summarize.keys())[0] for name in scalars: scalars[name] = scalars[name][0] with contrib_summary.create_file_writer( summary_dir, max_queue=1000).as_default(): with contrib_summary.always_record_summaries(): for name, value in scalars.items(): if name not in ["global_step"]: contrib_summary.scalar( name, value, step=scalars["global_step"]) return contrib_summary.all_summary_ops()
def host_call_fn(gs, lr): """Training host call. Creates scalar summaries for training metrics. Args: gs: `Tensor with shape `[batch]` for the global_step lr: `Tensor` with shape `[batch]` for the learning_rate. Returns: List of summary ops to run on the CPU host. """ gs = gs[0] with summary.create_file_writer( params['model_dir']).as_default(): with summary.always_record_summaries(): summary.scalar('learning_rate', lr[0], step=gs) return summary.all_summary_ops()
def host_call_fn(model_dir, **kwargs): """host_call function used for creating training summaries when using TPU. Args: model_dir: String indicating the output_dir to save summaries in. **kwargs: Set of metric names and tensor values for all desired summaries. Returns: Summary op to be passed to the host_call arg of the estimator function. """ gs = kwargs.pop('global_step')[0] with summary.create_file_writer(model_dir).as_default(): with summary.always_record_summaries(): for name, tensor in kwargs.iteritems(): summary.scalar(name, tensor[0], step=gs) return summary.all_summary_ops()
def summary_fn(G0, G1, G2, R0, R1, R2, L_D0, L_D1, L_D2, L_D0_W, L_D1_W, L_D2_W, L_G0, L_G1, L_G2, L_G, D0_global_step, D1_global_step, D2_global_step, G_global_step): with summary.create_file_writer(config.log_dir).as_default(): with summary.always_record_summaries(): max_image_outputs = 10 D0_global_step = tpu_depad(D0_global_step) D1_global_step = tpu_depad(D1_global_step) D2_global_step = tpu_depad(D2_global_step) G_global_step = tpu_depad(G_global_step) L_D0 = tpu_depad(L_D0) L_D1 = tpu_depad(L_D1) L_D2 = tpu_depad(L_D2) L_D0_W = tpu_depad(L_D0_W) L_D1_W = tpu_depad(L_D1_W) L_D2_W = tpu_depad(L_D2_W) L_G0 = tpu_depad(L_G0) L_G1 = tpu_depad(L_G1) L_G2 = tpu_depad(L_G2) L_G = tpu_depad(L_G) summary.image('R0', R0, max_images=max_image_outputs, step=D0_global_step) summary.image('R1', R1, max_images=max_image_outputs, step=D1_global_step) summary.image('R2', R2, max_images=max_image_outputs, step=D2_global_step) summary.image('G0', G0, max_images=max_image_outputs, step=G_global_step) summary.image('G1', G1, max_images=max_image_outputs, step=G_global_step) summary.image('G2', G2, max_images=max_image_outputs, step=G_global_step) with tf.name_scope('losses'): summary.scalar('D0', L_D0, step=D0_global_step) summary.scalar('D1', L_D1, step=D1_global_step) summary.scalar('D2', L_D2, step=D2_global_step) summary.scalar('D0_W', L_D0_W, step=D0_global_step) summary.scalar('D1_W', L_D1_W, step=D1_global_step) summary.scalar('D2_W', L_D2_W, step=D2_global_step) summary.scalar('G0', L_G0, step=G_global_step) summary.scalar('G1', L_G1, step=G_global_step) summary.scalar('G2', L_G2, step=G_global_step) summary.scalar('G', L_G, step=G_global_step) return summary.all_summary_ops()
def host_call_fn(global_step, total_loss, cross_entropy, regularization_loss, learning_rate, train_accuracy): global_step = global_step[0] with summary.create_file_writer( params['model_dir'], max_queue=params['iterations_per_loop']).as_default(): with summary.always_record_summaries(): summary.scalar('entire_loss', total_loss[0], step=global_step) summary.scalar('cross_entropy_loss', cross_entropy[0], step=global_step) summary.scalar('regularization_loss', regularization_loss[0], step=global_step) summary.scalar('learning_rate', learning_rate[0], step=global_step) summary.scalar('train_accuracy', train_accuracy[0], step=global_step) return summary.all_summary_ops()
def host_call_fn(global_step, loss, learning_rate, current_epoch): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: global_step: `Tensor with shape `[batch, ]` for the global_step. loss: `Tensor` with shape `[batch, ]` for the training loss. learning_rate: `Tensor` with shape `[batch, ]` for the learning_rate. current_epoch: `Tensor` with shape `[batch, ]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ # Outfeed supports int32 but global_step is expected to be int64. global_step = tf.reduce_mean(global_step) with (contrib_summary.create_file_writer( params['model_dir']).as_default()): with contrib_summary.always_record_summaries(): contrib_summary.scalar('loss', tf.reduce_mean(loss), step=global_step) contrib_summary.scalar('learning_rate', tf.reduce_mean(learning_rate), step=global_step) contrib_summary.scalar('current_epoch', tf.reduce_mean(current_epoch), step=global_step) return contrib_summary.all_summary_ops()
summary.histogram("obj masks", obj_p[0]) summary.histogram("flow_x_hist", flow[:, :, :, 0], family="flow") summary.histogram("flow_y_hist", flow[:, :, :, 1], family="flow") summary.image("frame0", cast_im(f0), max_images=3) summary.image("frame1", cast_im(f1), max_images=3) summary.image("frame1_t", cast_im(f1_t), max_images=3) summary.image("depth", cast_depth(depth), max_images=3) summary.image("optical_flow", cast_flow(flow), max_images=3) summary.image("object masks", cast_im(obj_p[0]), max_images=3) obj_summary(obj_p) cam_summary(cam_p) with tf.Session() as sess: tf.global_variables_initializer().run() summary.initialize(graph=tf.get_default_graph()) model.load_weights(os.path.join(models_path, "sfm.h5")) for s in range(S_max): l, *_ = sess.run( [loss, optimize, summary.all_summary_ops()]) # beholder.update(session=sess) if s % 50 == 0: print("Iteration: {} Loss: {}".format(s, l)) if s % 5000 == 0 and not s == 0: model.save_weights(os.path.join(models_path, "sfm.h5"))
def tb_summary_fn_tpu(global_step, loss, learning_rate, current_epoch): """Training host call. Creates scalar summaries for training metrics. This function is executed on the CPU and should not directly reference any Tensors in the rest of the `model_fn`. To pass Tensors from the model to the `metric_fn`, provide as part of the `host_call`. See https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec for more information. Arguments should match the list of `Tensor` objects passed as the second element in the tuple passed to `host_call`. Args: global_step: `Tensor with shape `[batch]` for the global_step loss: `Tensor` with shape `[batch]` for the training loss. learning_rate: `Tensor` with shape `[batch]` for the learning_rate. current_epoch: `Tensor` with shape `[batch]` for the current_epoch. Returns: List of summary ops to run on the CPU host. """ with tf.name_scope( name='tb_summary', values=[ global_step, loss, # mid_loss_list, learning_rate, current_epoch ]): global_step = global_step[0] ## create tflog dir now = datetime.utcnow().strftime("%Y%m%d%H%M%S") tb_logdir_path = TENSORBOARD_BUCKET tb_logdir = "{}/run-{}/".format(tb_logdir_path, now) tf.logging.info('[model_fn] tf summary at %s' % tb_logdir) if not tf.gfile.Exists(tb_logdir_path): tf.gfile.MakeDirs(tb_logdir_path) with summary.create_file_writer(logdir=tb_logdir).as_default(): with summary.always_record_summaries(): summary.scalar('loss', loss[0], step=global_step) # for n in range(0,model_config.num_of_hgstacking): # mid_loss_head,\ # mid_loss_neck,\ # mid_loss_Rshoulder,\ # mid_loss_Lshoulder = tf.unstack(mid_loss_list[n]) # # summary.scalar('mid_loss_head'+str(n), mid_loss_head) # summary.scalar('mid_loss_neck'+str(n), mid_loss_neck) # summary.scalar('mid_loss_Rshoulder'+str(n), mid_loss_Rshoulder) # summary.scalar('mid_loss_Lshoulder'+str(n), mid_loss_Lshoulder) summary.scalar('learning_rate', learning_rate[0], step=global_step) summary.scalar('current_epoch', current_epoch[0], step=global_step) return summary.all_summary_ops()