def testTemporalSeparationLossDifferentMovement(self): """Temporal separation loss should be low for nonparallel movement.""" # Create trajectories in which all keypoints move differently: coords = self._create_parallel_coords() coords[:, 0, :] = -coords[:, 0, :] coords[:, 1, :] = 0.0 with self.session() as sess: coords = tf.convert_to_tensor(coords) loss = sess.run(losses.temporal_separation_loss(self.cfg, coords)) np.testing.assert_almost_equal(loss, 0.0, decimal=4)
def build_model(cfg, data_shapes): """Builds the complete model with image encoder plus dynamics model. This architecture is meant for testing/illustration only. Model architecture: image_sequence --> keypoints --> reconstructed_image_sequence | V dynamics_model --> predicted_keypoints The model takes a [batch_size, timesteps, H, W, C] image sequence as input. It "observes" all frames, detects keypoints, and reconstructs the images. The dynamics model learns to predict future keypoints based on the detected keypoints. Args: cfg: ConfigDict with model hyperparameters. data_shapes: Dict of shapes of model input tensors, as returned by datasets.get_sequence_dataset. Returns: tf.keras.Model object. """ input_shape_no_batch = data_shapes['image'][ 1:] # Keras uses shape w/o batch. input_images = tf.keras.Input(shape=input_shape_no_batch, name='image') # Vision model: observed_keypoints, _ = vision.build_images_to_keypoints_net( cfg, input_shape_no_batch)(input_images) keypoints_to_images_net = vision.build_keypoints_to_images_net( cfg, input_shape_no_batch) reconstructed_images = keypoints_to_images_net([ observed_keypoints, input_images[:, 0, Ellipsis], observed_keypoints[:, 0, Ellipsis] ]) # Dynamics model: observed_keypoints_stop = tf.keras.layers.Lambda( tf.stop_gradient)(observed_keypoints) dynamics_model = dynamics.build_vrnn(cfg) predicted_keypoints, kl_divergence = dynamics_model( observed_keypoints_stop) model = tf.keras.Model(inputs=[input_images], outputs=[ reconstructed_images, observed_keypoints, predicted_keypoints ], name='autoencoder') # Losses: image_loss = tf.nn.l2_loss(input_images - reconstructed_images) # Normalize by batch size and sequence length: image_loss /= tf.to_float( tf.shape(input_images)[0] * tf.shape(input_images)[1]) model.add_loss(image_loss) separation_loss = losses.temporal_separation_loss( cfg, observed_keypoints[:, :cfg.observed_steps, Ellipsis]) model.add_loss(cfg.separation_loss_scale * separation_loss) vrnn_coord_pred_loss = tf.nn.l2_loss(observed_keypoints_stop - predicted_keypoints) # Normalize by batch size and sequence length: vrnn_coord_pred_loss /= tf.to_float( tf.shape(input_images)[0] * tf.shape(input_images)[1]) model.add_loss(vrnn_coord_pred_loss) kl_loss = tf.reduce_mean(kl_divergence) # Mean over batch and timesteps. model.add_loss(cfg.kl_loss_scale * kl_loss) return model
def testTemporalSeparationLossParallelMovement(self): """Temporal separation loss should be high for parallel-moving keypoints.""" with self.session() as sess: coords = tf.convert_to_tensor(self._create_parallel_coords()) loss = sess.run(losses.temporal_separation_loss(self.cfg, coords)) np.testing.assert_almost_equal(loss, 1.0, decimal=4)