def display_video_hooks(hook_args): """Hooks to display videos at decode time.""" predictions = hook_args.predictions fps = hook_args.decode_hparams.frames_per_second border_percent = hook_args.decode_hparams.border_percent all_summaries = [] for decode_ind, decode in enumerate(predictions): target_videos = video_metrics.stack_data_given_key(decode, "targets") output_videos = video_metrics.stack_data_given_key(decode, "outputs") input_videos = video_metrics.stack_data_given_key(decode, "inputs") target_videos = np.asarray(target_videos, dtype=np.uint8) output_videos = np.asarray(output_videos, dtype=np.uint8) input_videos = np.asarray(input_videos, dtype=np.uint8) input_videos = create_border(input_videos, color="blue", border_percent=border_percent) target_videos = create_border(target_videos, color="red", border_percent=border_percent) output_videos = create_border(output_videos, color="red", border_percent=border_percent) # Video gif. all_input = np.concatenate((input_videos, target_videos), axis=1) all_output = np.concatenate((input_videos, output_videos), axis=1) input_summ_vals, _ = common_video.py_gif_summary( "decode_%d/input" % decode_ind, all_input, max_outputs=10, fps=fps, return_summary_value=True) output_summ_vals, _ = common_video.py_gif_summary( "decode_%d/output" % decode_ind, all_output, max_outputs=10, fps=fps, return_summary_value=True) all_summaries.extend(input_summ_vals) all_summaries.extend(output_summ_vals) # Frame-by-frame summaries iterable = zip(all_input[:10], all_output[:10]) for ind, (input_video, output_video) in enumerate(iterable): t, h, w, c = input_video.shape # Tile vertically input_frames = np.reshape(input_video, (t * h, w, c)) output_frames = np.reshape(output_video, (t * h, w, c)) # Concat across width. all_frames = np.concatenate((input_frames, output_frames), axis=1) tag = "input/output/decode_%d_sample_%d" % (decode_ind, ind) frame_by_frame_summ = image_utils.image_to_tf_summary_value( all_frames, tag=tag) all_summaries.append(frame_by_frame_summ) return all_summaries
def convert_videos_to_summaries(input_videos, output_videos, target_videos, tag, decode_hparams): """Converts input, output and target videos into video summaries. Args: input_videos: 5-D NumPy array, (NTHWC) conditioning frames. output_videos: 5-D NumPy array, (NTHWC) ground truth. target_videos: 5-D NumPy array, (NTHWC) target frames. tag: tf summary tag. decode_hparams: tf.contrib.training.HParams. Returns: summaries: a list of tf frame-by-frame and video summaries. """ fps = decode_hparams.frames_per_second border_percent = decode_hparams.border_percent max_outputs = decode_hparams.max_display_outputs all_summaries = [] input_videos = create_border(input_videos, color="blue", border_percent=border_percent) target_videos = create_border(target_videos, color="red", border_percent=border_percent) output_videos = create_border(output_videos, color="red", border_percent=border_percent) # Video gif. all_input = np.concatenate((input_videos, target_videos), axis=1) all_output = np.concatenate((input_videos, output_videos), axis=1) input_summ_vals, _ = common_video.py_gif_summary("%s/input" % tag, all_input, max_outputs=max_outputs, fps=fps, return_summary_value=True) output_summ_vals, _ = common_video.py_gif_summary( "%s/output" % tag, all_output, max_outputs=max_outputs, fps=fps, return_summary_value=True) all_summaries.extend(input_summ_vals) all_summaries.extend(output_summ_vals) # Frame-by-frame summaries iterable = zip(all_input[:max_outputs], all_output[:max_outputs]) for ind, (input_video, output_video) in enumerate(iterable): t, h, w, c = input_video.shape # Tile vertically input_frames = np.reshape(input_video, (t * h, w, c)) output_frames = np.reshape(output_video, (t * h, w, c)) # Concat across width. all_frames = np.concatenate((input_frames, output_frames), axis=1) tag = "input/output/%s_sample_%d" % (tag, ind) frame_by_frame_summ = image_utils.image_to_tf_summary_value(all_frames, tag=tag) all_summaries.append(frame_by_frame_summ) return all_summaries
def interpolations_to_summary(sample_ind, interpolations, first_frame, last_frame, hparams, decode_hp): """Converts interpolated frames into tf summaries. The summaries consists of: 1. Image summary corresponding to the first frame. 2. Image summary corresponding to the last frame. 3. The interpolated frames as a gif summary. Args: sample_ind: int interpolations: Numpy array, shape=(num_interp, H, W, 3) first_frame: Numpy array, shape=(HWC) last_frame: Numpy array, shape=(HWC) hparams: HParams, train hparams decode_hp: HParams, decode hparams Returns: summaries: list of tf Summary Values. """ parent_tag = "sample_%d" % sample_ind frame_shape = hparams.problem.frame_shape interp_shape = [hparams.batch_size, decode_hp.num_interp] + frame_shape interpolations = np.reshape(interpolations, interp_shape) interp_tag = "%s/interp/%s" % (parent_tag, decode_hp.channel_interp) if decode_hp.channel_interp == "ranked": interp_tag = "%s/rank_%d" % (interp_tag, decode_hp.rank_interp) summaries, _ = common_video.py_gif_summary( interp_tag, interpolations, return_summary_value=True, max_outputs=decode_hp.max_display_outputs, fps=decode_hp.frames_per_second) if decode_hp.save_frames: first_frame_summ = image_utils.image_to_tf_summary_value( first_frame, "%s/first" % parent_tag) last_frame_summ = image_utils.image_to_tf_summary_value( last_frame, "%s/last" % parent_tag) summaries.append(first_frame_summ) summaries.append(last_frame_summ) return summaries
def interpolations_to_summary(sample_ind, interpolations, hparams, decode_hparams): """Converts interpolated frames into tf summaries. The summaries consists of: 1. Image summary corresponding to the first frame. 2. Image summary corresponding to the last frame. 3. The interpolated frames as a gif summary. Args: sample_ind: int interpolations: Numpy array, shape=(num_interp, 64, 64, 3) hparams: tf.contrib.training.HParams, train hparams decode_hparams: tf.contrib.training.HParams, decode hparams Returns: summaries: list of tf Summary Values. """ parent_tag = "sample_%d" % sample_ind frame_shape = hparams.problem.frame_shape interp_shape = [hparams.batch_size, FLAGS.num_interp] + frame_shape interpolations = np.reshape(interpolations, interp_shape) summaries, _ = common_video.py_gif_summary( parent_tag, interpolations, return_summary_value=True, max_outputs=decode_hparams.max_display_outputs, fps=decode_hparams.frames_per_second) first_frame, last_frame = interpolations[0, 0], interpolations[0, -1] first_frame_summ = image_utils.image_to_tf_summary_value( first_frame, "%s/first" % parent_tag) last_frame_summ = image_utils.image_to_tf_summary_value( last_frame, "%s/last" % parent_tag) summaries.append(first_frame_summ) summaries.append(last_frame_summ) return summaries
def testImageToSummaryValue(self): rng = np.random.RandomState(0) x = rng.randint(0, 255, (32, 32, 3)) x_summary = image_utils.image_to_tf_summary_value(x, "X_image") self.assertEqual(x_summary.tag, "X_image")