def _get_discriminator_logits(learner_agent_output, env_output, actor_agent_output, actor_action, reward_clipping, discounting, baseline_cost, entropy_cost, num_steps): """Discriminator loss.""" del actor_agent_output del actor_action del reward_clipping del discounting del baseline_cost del entropy_cost first_true = utils.get_first_true_column( env_output.observation['disc_mask']) # Shape of output_logits:[time, batch]. output_logits = learner_agent_output.policy_logits # Shape of output_logits:[batch]. output_logits = tf.boolean_mask(output_logits, first_true) output_affine_a, output_affine_b = learner_agent_output.baseline # Get the first true. labels = tf.cast(env_output.observation['label'], tf.float32) tf.summary.scalar('labels/mean_labels before masking', tf.reduce_mean(labels), step=num_steps) # Shape of labels:[batch]. labels = tf.boolean_mask(labels, first_true) positive_label = tf.equal(labels, tf.constant(1.0)) positive_logits = tf.boolean_mask(output_logits, positive_label) tf.summary.histogram('distribution/sigmoid_positive_logits', tf.sigmoid(positive_logits), step=num_steps) tf.summary.histogram('distribution/positive_logits', positive_logits, step=num_steps) negative_label = tf.equal(labels, tf.constant(0.0)) negative_logits = tf.boolean_mask(output_logits, negative_label) tf.summary.histogram('distribution/sigmoid_negative_logits', tf.sigmoid(negative_logits), step=num_steps) tf.summary.histogram('distribution/negative_logits', negative_logits, step=num_steps) tf.summary.scalar('labels/positive_label_ratio', tf.reduce_mean(tf.cast(positive_label, tf.float32)), step=num_steps) tf.summary.scalar('affine_transform/a', tf.reduce_mean(output_affine_a), step=num_steps) tf.summary.scalar('affine_transform/b', tf.reduce_mean(output_affine_b), step=num_steps) # Shape: [batch] return labels, output_logits
def get_discriminator_loss(learner_agent_output, env_output, actor_agent_output, actor_action, reward_clipping, discounting, baseline_cost, entropy_cost, num_steps): """Discriminator loss.""" del actor_agent_output del actor_action del reward_clipping del discounting del baseline_cost del entropy_cost first_true = utils.get_first_true_column( env_output.observation['disc_mask']) output_logits = learner_agent_output.policy_logits output_logits = tf.squeeze(output_logits, axis=1) output_logits = tf.boolean_mask(output_logits, first_true) output_affine_a, output_affine_b = learner_agent_output.baseline # Get the first true. labels = tf.cast(env_output.observation['label'], tf.float32) labels = tf.boolean_mask(labels, first_true) positive_label = tf.equal(labels, tf.constant(1.0)) positive_logits = tf.boolean_mask(output_logits, positive_label) tf.summary.histogram('distribution/sigmoid_positive_logits', tf.sigmoid(positive_logits), step=num_steps) tf.summary.histogram('distribution/positive_logits', positive_logits, step=num_steps) negative_label = tf.equal(labels, tf.constant(0.0)) negative_logits = tf.boolean_mask(output_logits, negative_label) tf.summary.histogram('distribution/sigmoid_negative_logits', tf.sigmoid(negative_logits), step=num_steps) tf.summary.histogram('distribution/negative_logits', negative_logits, step=num_steps) tf.summary.scalar('labels/positive_label', tf.reduce_mean(tf.cast(positive_label, tf.float32)), step=num_steps) tf.summary.scalar('labels/labels', tf.reduce_mean(labels), step=num_steps) tf.summary.scalar('affine_transform/a', tf.reduce_mean(output_affine_a), step=num_steps) tf.summary.scalar('affine_transform/b', tf.reduce_mean(output_affine_b), step=num_steps) cross_entropy = tf.nn.weighted_cross_entropy_with_logits( labels=labels, logits=output_logits, pos_weight=5) return cross_entropy
def _head(self, env_output, neck_outputs): disc_mask = tf.reshape( neck_outputs[constants.DISC_MASK], [self._current_num_timesteps, self._current_batch_size]) # Get first_true time step for text states as it's the same for all steps # in a path. # Shape = [time, batch] for both disc_mask and first_true first_true = utils.get_first_true_column(disc_mask) # Transpose to [batch, time] to ensure correct batch order for boolean_mask. first_true = tf.transpose(first_true, perm=[1, 0]) # Transpose a list of n_lstm_layers (h, c) states to batch major. raw_text_state = tf.nest.map_structure( lambda t: tf.transpose(t, perm=[1, 0, 2]), neck_outputs['text_state']) tf.debugging.assert_equal( raw_text_state[0][0].shape, [self._current_batch_size, self._current_num_timesteps, 512]) # Take the first step's text state since it's the same for all steps. # Selected state has shape [batch, hidden] text_state = self._select_by_mask(raw_text_state, first_true) # Projected shape: [batch, hidden_dim]. text_feature = self._get_final_projection( self._instruction_feature_projection, text_state) # Get last_true mask for image states, i.e., state at end of sequence. # Shape = [time, batch] for both disc_mask and last_true last_true = utils.get_last_true_column(disc_mask) last_true = tf.transpose(last_true, perm=[1, 0]) # Sanity check: ensure the first and last text states in a path are same. text_state_last_true = self._select_by_mask(raw_text_state, last_true) tf.debugging.assert_equal(text_state[-1][0], text_state_last_true[-1][0]) # Transpose image states, a list of (h, c) states, into batch major. Each # state has shape [batch, time_step, hidden_dim] raw_image_state = tf.nest.map_structure( lambda t: tf.transpose(t, perm=[1, 0, 2]), neck_outputs['visual_state']) if self._average_image_states_of_all_steps: # Shape = [batch, time_step, 1] float_disc_mask = tf.expand_dims(tf.cast(tf.transpose(disc_mask), tf.float32), axis=2) # Shape of each reduced state: [batch, hidden_dim] image_state = tf.nest.map_structure( lambda x: tf.reduce_mean(x * float_disc_mask, 1), raw_image_state) else: # Selected state has shape [batch, hidden_dim]. image_state = self._select_by_mask(raw_image_state, last_true) # Projected shape: [batch, hidden]. visual_feature = self._get_final_projection( self._image_feature_projection, image_state) # Normalize features. visual_feature = tf.nn.l2_normalize(visual_feature, axis=-1) text_feature = tf.nn.l2_normalize(text_feature, axis=-1) # Select path_ids for current batch. # Transposed shape = [batch, time]. raw_path_ids = tf.transpose(env_output.observation[constants.PATH_ID]) # Shape = [batch]. path_ids = self._select_by_mask(raw_path_ids, first_true) # Asserts first true and last true are referring to the same path. path_ids_last_true = self._select_by_mask(raw_path_ids, last_true) tf.debugging.assert_equal(path_ids, path_ids_last_true) # Shape = [time, batch] raw_labels = tf.cast(env_output.observation['label'], tf.float32) raw_labels = tf.transpose(raw_labels) # Shape = [batch] labels = self._select_by_mask(raw_labels, first_true) tf.debugging.assert_equal(labels, self._select_by_mask(raw_labels, last_true)) # Add time dimension as required by actor. Shape = [1, batch] labels = tf.expand_dims(labels, axis=0) # Shape: [batch, batch] similarity = tf.matmul(visual_feature, tf.transpose(text_feature, perm=[1, 0])) # Add time dim as required by actor. Shape = [1, batch, batch] similarity = tf.expand_dims(similarity, axis=0) # Make similarity mask to exclude multiple positive matching labels diag_mask = tf.eye(self._current_batch_size, dtype=tf.bool) # path_id mask where matching col-row pairs are 1 except diagnal pairs. rows = tf.tile(tf.reshape(path_ids, [self._current_batch_size, 1]), [1, self._current_batch_size]) cols = tf.tile(tf.reshape(path_ids, [1, self._current_batch_size]), [self._current_batch_size, 1]) path_id_mask = tf.logical_and(tf.equal(rows, cols), tf.logical_not(diag_mask)) # Filter the mask by label. Positive labels are 1. row_labels = tf.tile(tf.reshape(labels, [self._current_batch_size, 1]), [1, self._current_batch_size]) col_labels = tf.tile(tf.reshape(labels, [1, self._current_batch_size]), [self._current_batch_size, 1]) label_mask = tf.logical_and(tf.cast(row_labels, tf.bool), tf.cast(col_labels, tf.bool)) # M[i, j]=0 (i!=j) if path_id_mask[i,j] is True and label_mask[i, j] is True similarity_mask = tf.logical_not( tf.logical_and(path_id_mask, label_mask)) # Add timestep dim as required by actor. Shape = [1, batch, batch] similarity_mask = tf.expand_dims(similarity_mask, axis=0) # Computes logits by transforming similarity from [-1, 1] to unbound. # Shape: [time, batch, batch] similarity_logits = self.similarity_scaler * similarity output_logits = { 'similarity': similarity_logits, 'similarity_mask': similarity_mask, 'labels': labels } # Logits for classification loss. Shape = [time, batch] classification_logits = ( self.affine_a * tf.linalg.diag_part(similarity) + self.affine_b) return common.AgentOutput(policy_logits=output_logits, baseline=classification_logits)
def _head(self, neck_outputs): # Shape : [time * batch] path_ids = neck_outputs[constants.PATH_ID] path_ids = tf.transpose( tf.reshape( path_ids, [self._current_num_timesteps, self._current_batch_size])) # <tf.float32>[time * batch_size, 1, hidden_dim] visual_feature = neck_outputs['visual_feature'] # <tf.float32>[time * batch_size, num_tokens, hidden_dim] raw_text_feature = tf.reshape( neck_outputs['text_feature'], [self._current_num_timesteps, self._current_batch_size] + neck_outputs['text_feature'].shape[1:].as_list()) # Shape = [batch_size, time, num_tokens, hidden_dim] raw_text_feature = tf.transpose(raw_text_feature, perm=[1, 0, 2, 3]) # <tf.float32>[time, batch_size, 1, hidden_dim] visual_feature = tf.reshape( visual_feature, [self._current_num_timesteps, self._current_batch_size] + visual_feature.shape[1:].as_list()) # <tf.float32>[batch_size, time, hidden_dim] visual_feature = tf.squeeze(visual_feature, axis=2) visual_feature = tf.transpose(visual_feature, [1, 0, 2]) first_true = utils.get_first_true_column( tf.reshape( neck_outputs[constants.DISC_MASK], [self._current_num_timesteps, self._current_batch_size])) first_true = tf.transpose(first_true) # Sanity Check: path_ids are consistent for first_true and last_true. last_true = utils.get_last_true_column( tf.reshape( neck_outputs[constants.DISC_MASK], [self._current_num_timesteps, self._current_batch_size])) last_true = tf.transpose(last_true) path_ids_first_true = tf.cond( tf.keras.backend.any(first_true), lambda: tf.boolean_mask(path_ids, first_true), lambda: path_ids[:, 0]) path_ids_last_true = tf.cond( tf.keras.backend.any(last_true), lambda: tf.boolean_mask(path_ids, last_true), lambda: path_ids[:, 0]) tf.debugging.assert_equal(path_ids_first_true, path_ids_last_true) # <tf.float32>[batch_size, num_tokens, hidden_dim] text_feature = tf.cond( tf.keras.backend.any(first_true), lambda: tf.boolean_mask(raw_text_feature, first_true), lambda: raw_text_feature[:, 0, :, :]) text_feature_last_true = tf.cond( tf.keras.backend.any(last_true), lambda: tf.boolean_mask(raw_text_feature, last_true), lambda: raw_text_feature[:, 0, :, :]) tf.debugging.assert_equal(text_feature, text_feature_last_true) # visual_feature = tf.nn.l2_normalize(visual_feature, axis=2) # text_feature = tf.nn.l2_normalize(text_feature, axis=2) # <tf.float32>[batch_size, time, num_tokens] alpha_i_j = tf.matmul(visual_feature, tf.transpose(text_feature, perm=[0, 2, 1])) # <tf.float32>[batch, time, num_tokens] c_i_j = tf.nn.softmax(alpha_i_j) # <tf.float32>[batch_size, time, num_tokens] mask = tf.cast( tf.transpose(tf.reshape( neck_outputs[constants.DISC_MASK], [self._current_num_timesteps, self._current_batch_size]), perm=[1, 0]), tf.float32) # <tf.float32>[batch, time] score = tf.reduce_sum(c_i_j * alpha_i_j, 2) # Compute softmin(x) = softmax(-x) # Use stable softmax since softmax(x) = softmax(x+c) for any constant c. # Here we use constant c = max(-x). negative_score = -1.0 * score escore = tf.exp(negative_score - tf.reduce_max(negative_score)) * mask sum_escore = tf.tile(tf.expand_dims(tf.reduce_sum(escore, 1), 1), [1, tf.shape(escore)[1]]) score_weight = tf.divide(escore, sum_escore) similarities = tf.reduce_sum(mask * score * score_weight, 1) similarities = tf.expand_dims(similarities, axis=0) # shape: [time * batch_size] similarities = tf.reshape( tf.tile(similarities, [self._current_num_timesteps, 1]), [-1]) # Apply an affine transform. similarities = similarities * self.affine_a + self.affine_b output_a = tf.reshape(tf.convert_to_tensor(self.affine_a), [1, 1]) output_b = tf.reshape(tf.convert_to_tensor(self.affine_b), [1, 1]) # shape: [time * batch] output_a = tf.reshape( tf.tile(output_a, [self._current_num_timesteps, self._current_batch_size]), [-1]) output_b = tf.reshape( tf.tile(output_b, [self._current_num_timesteps, self._current_batch_size]), [-1]) return common.AgentOutput(policy_logits=similarities, baseline=(output_a, output_b))
def _head(self, neck_outputs): # <tf.float32>[time * batch_size, 1, hidden_dim] visual_feature = neck_outputs['visual_feature'] # <tf.float32>[time * batch_size, num_tokens, hidden_dim] text_feature = neck_outputs['text_feature'] # <tf.float32>[time, batch_size, 1, hidden_dim] visual_feature = tf.reshape( visual_feature, [self._current_num_timesteps, self._current_batch_size] + visual_feature.shape[1:].as_list()) # <tf.float32>[batch_size, time, hidden_dim] visual_feature = tf.squeeze(visual_feature, axis=2) visual_feature = tf.transpose(visual_feature, [1, 0, 2]) first_true = utils.get_first_true_column( tf.reshape(neck_outputs[constants.DISC_MASK], [self._current_num_timesteps, self._current_batch_size])) # <tf.float32>[batch_size, num_tokens, hidden_dim] text_feature = tf.cond( tf.keras.backend.any(first_true), lambda: tf.boolean_mask(text_feature, tf.reshape(first_true, [-1])), lambda: tf.reshape(text_feature, [ self._current_num_timesteps, self._current_batch_size ] + text_feature.shape[1:].as_list())[0, :, :, :]) # visual_feature = tf.nn.l2_normalize(visual_feature, axis=2) # text_feature = tf.nn.l2_normalize(text_feature, axis=2) # <tf.float32>[batch_size, time, num_tokens] alpha_i_j = tf.matmul(visual_feature, tf.transpose(text_feature, perm=[0, 2, 1])) # <tf.float32>[batch_size, time, num_tokens] ealpha_i_j = tf.exp(alpha_i_j) sum_i_j = tf.tile( tf.expand_dims(tf.reduce_sum(ealpha_i_j, 2), 2), [1, 1, tf.shape(ealpha_i_j)[2]]) mask = tf.cast( tf.transpose( tf.reshape(neck_outputs[constants.DISC_MASK], [self._current_num_timesteps, self._current_batch_size]), perm=[1, 0]), tf.float32) # <tf.float32>[batch, time, num_tokens] c_i_j = tf.divide(ealpha_i_j, sum_i_j) # <tf.float32>[batch, time] score = tf.reduce_sum(c_i_j * alpha_i_j, 2) escore = tf.exp(-1 * score) * mask sum_escore = tf.tile( tf.expand_dims(tf.reduce_sum(escore, 1), 1), [1, tf.shape(escore)[1]]) score_weight = tf.divide(escore, sum_escore) similarities = tf.reduce_sum(mask * score * score_weight, 1) similarities = tf.expand_dims(similarities, axis=0) # [time_step, batch_size] similarities = tf.tile(similarities, [self._current_num_timesteps, 1]) # Apply an affine transform. similarities = similarities * self.affine_a + self.affine_b output_a = tf.reshape(tf.convert_to_tensor(self.affine_a), [1, 1]) output_b = tf.reshape(tf.convert_to_tensor(self.affine_b), [1, 1]) output_a = tf.tile(output_a, [self._current_num_timesteps, self._current_batch_size]) output_b = tf.tile(output_b, [self._current_num_timesteps, self._current_batch_size]) return common.AgentOutput( policy_logits=similarities, baseline=(output_a, output_b))