Пример #1
0
def _get_discriminator_logits(learner_agent_output, env_output,
                              actor_agent_output, actor_action,
                              reward_clipping, discounting, baseline_cost,
                              entropy_cost, num_steps):
    """Discriminator loss."""
    del actor_agent_output
    del actor_action
    del reward_clipping
    del discounting
    del baseline_cost
    del entropy_cost

    first_true = utils.get_first_true_column(
        env_output.observation['disc_mask'])
    # Shape of output_logits:[time, batch].
    output_logits = learner_agent_output.policy_logits
    # Shape of output_logits:[batch].
    output_logits = tf.boolean_mask(output_logits, first_true)
    output_affine_a, output_affine_b = learner_agent_output.baseline

    # Get the first true.
    labels = tf.cast(env_output.observation['label'], tf.float32)
    tf.summary.scalar('labels/mean_labels before masking',
                      tf.reduce_mean(labels),
                      step=num_steps)
    # Shape of labels:[batch].
    labels = tf.boolean_mask(labels, first_true)

    positive_label = tf.equal(labels, tf.constant(1.0))
    positive_logits = tf.boolean_mask(output_logits, positive_label)
    tf.summary.histogram('distribution/sigmoid_positive_logits',
                         tf.sigmoid(positive_logits),
                         step=num_steps)
    tf.summary.histogram('distribution/positive_logits',
                         positive_logits,
                         step=num_steps)

    negative_label = tf.equal(labels, tf.constant(0.0))
    negative_logits = tf.boolean_mask(output_logits, negative_label)
    tf.summary.histogram('distribution/sigmoid_negative_logits',
                         tf.sigmoid(negative_logits),
                         step=num_steps)
    tf.summary.histogram('distribution/negative_logits',
                         negative_logits,
                         step=num_steps)
    tf.summary.scalar('labels/positive_label_ratio',
                      tf.reduce_mean(tf.cast(positive_label, tf.float32)),
                      step=num_steps)
    tf.summary.scalar('affine_transform/a',
                      tf.reduce_mean(output_affine_a),
                      step=num_steps)
    tf.summary.scalar('affine_transform/b',
                      tf.reduce_mean(output_affine_b),
                      step=num_steps)
    # Shape: [batch]
    return labels, output_logits
Пример #2
0
def get_discriminator_loss(learner_agent_output, env_output,
                           actor_agent_output, actor_action, reward_clipping,
                           discounting, baseline_cost, entropy_cost,
                           num_steps):
    """Discriminator loss."""
    del actor_agent_output
    del actor_action
    del reward_clipping
    del discounting
    del baseline_cost
    del entropy_cost

    first_true = utils.get_first_true_column(
        env_output.observation['disc_mask'])
    output_logits = learner_agent_output.policy_logits
    output_logits = tf.squeeze(output_logits, axis=1)
    output_logits = tf.boolean_mask(output_logits, first_true)
    output_affine_a, output_affine_b = learner_agent_output.baseline

    # Get the first true.
    labels = tf.cast(env_output.observation['label'], tf.float32)
    labels = tf.boolean_mask(labels, first_true)

    positive_label = tf.equal(labels, tf.constant(1.0))
    positive_logits = tf.boolean_mask(output_logits, positive_label)
    tf.summary.histogram('distribution/sigmoid_positive_logits',
                         tf.sigmoid(positive_logits),
                         step=num_steps)
    tf.summary.histogram('distribution/positive_logits',
                         positive_logits,
                         step=num_steps)

    negative_label = tf.equal(labels, tf.constant(0.0))
    negative_logits = tf.boolean_mask(output_logits, negative_label)
    tf.summary.histogram('distribution/sigmoid_negative_logits',
                         tf.sigmoid(negative_logits),
                         step=num_steps)
    tf.summary.histogram('distribution/negative_logits',
                         negative_logits,
                         step=num_steps)

    tf.summary.scalar('labels/positive_label',
                      tf.reduce_mean(tf.cast(positive_label, tf.float32)),
                      step=num_steps)

    tf.summary.scalar('labels/labels', tf.reduce_mean(labels), step=num_steps)
    tf.summary.scalar('affine_transform/a',
                      tf.reduce_mean(output_affine_a),
                      step=num_steps)
    tf.summary.scalar('affine_transform/b',
                      tf.reduce_mean(output_affine_b),
                      step=num_steps)

    cross_entropy = tf.nn.weighted_cross_entropy_with_logits(
        labels=labels, logits=output_logits, pos_weight=5)
    return cross_entropy
Пример #3
0
    def _head(self, env_output, neck_outputs):
        disc_mask = tf.reshape(
            neck_outputs[constants.DISC_MASK],
            [self._current_num_timesteps, self._current_batch_size])
        # Get first_true time step for text states as it's the same for all steps
        # in a path.
        # Shape = [time, batch] for both disc_mask and first_true
        first_true = utils.get_first_true_column(disc_mask)
        # Transpose to [batch, time] to ensure correct batch order for boolean_mask.
        first_true = tf.transpose(first_true, perm=[1, 0])

        # Transpose a list of n_lstm_layers (h, c) states to batch major.
        raw_text_state = tf.nest.map_structure(
            lambda t: tf.transpose(t, perm=[1, 0, 2]),
            neck_outputs['text_state'])
        tf.debugging.assert_equal(
            raw_text_state[0][0].shape,
            [self._current_batch_size, self._current_num_timesteps, 512])
        # Take the first step's text state since it's the same for all steps.
        # Selected state has shape [batch, hidden]
        text_state = self._select_by_mask(raw_text_state, first_true)

        # Projected shape: [batch, hidden_dim].
        text_feature = self._get_final_projection(
            self._instruction_feature_projection, text_state)

        # Get last_true mask for image states, i.e., state at end of sequence.
        # Shape = [time, batch] for both disc_mask and last_true
        last_true = utils.get_last_true_column(disc_mask)
        last_true = tf.transpose(last_true, perm=[1, 0])
        # Sanity check: ensure the first and last text states in a path are same.
        text_state_last_true = self._select_by_mask(raw_text_state, last_true)
        tf.debugging.assert_equal(text_state[-1][0],
                                  text_state_last_true[-1][0])

        # Transpose image states, a list of (h, c) states, into batch major. Each
        # state has shape [batch, time_step, hidden_dim]
        raw_image_state = tf.nest.map_structure(
            lambda t: tf.transpose(t, perm=[1, 0, 2]),
            neck_outputs['visual_state'])
        if self._average_image_states_of_all_steps:
            # Shape = [batch, time_step, 1]
            float_disc_mask = tf.expand_dims(tf.cast(tf.transpose(disc_mask),
                                                     tf.float32),
                                             axis=2)
            # Shape of each reduced state: [batch, hidden_dim]
            image_state = tf.nest.map_structure(
                lambda x: tf.reduce_mean(x * float_disc_mask, 1),
                raw_image_state)
        else:
            # Selected state has shape [batch, hidden_dim].
            image_state = self._select_by_mask(raw_image_state, last_true)
        # Projected shape: [batch, hidden].
        visual_feature = self._get_final_projection(
            self._image_feature_projection, image_state)

        # Normalize features.
        visual_feature = tf.nn.l2_normalize(visual_feature, axis=-1)
        text_feature = tf.nn.l2_normalize(text_feature, axis=-1)

        # Select path_ids for current batch.
        # Transposed shape = [batch, time].
        raw_path_ids = tf.transpose(env_output.observation[constants.PATH_ID])
        # Shape = [batch].
        path_ids = self._select_by_mask(raw_path_ids, first_true)
        # Asserts first true and last true are referring to the same path.
        path_ids_last_true = self._select_by_mask(raw_path_ids, last_true)
        tf.debugging.assert_equal(path_ids, path_ids_last_true)

        # Shape = [time, batch]
        raw_labels = tf.cast(env_output.observation['label'], tf.float32)
        raw_labels = tf.transpose(raw_labels)
        # Shape = [batch]
        labels = self._select_by_mask(raw_labels, first_true)
        tf.debugging.assert_equal(labels,
                                  self._select_by_mask(raw_labels, last_true))
        # Add time dimension as required by actor. Shape = [1, batch]
        labels = tf.expand_dims(labels, axis=0)

        # Shape: [batch, batch]
        similarity = tf.matmul(visual_feature,
                               tf.transpose(text_feature, perm=[1, 0]))
        # Add time dim as required by actor. Shape = [1, batch, batch]
        similarity = tf.expand_dims(similarity, axis=0)

        # Make similarity mask to exclude multiple positive matching labels
        diag_mask = tf.eye(self._current_batch_size, dtype=tf.bool)
        # path_id mask where matching col-row pairs are 1 except diagnal pairs.
        rows = tf.tile(tf.reshape(path_ids, [self._current_batch_size, 1]),
                       [1, self._current_batch_size])
        cols = tf.tile(tf.reshape(path_ids, [1, self._current_batch_size]),
                       [self._current_batch_size, 1])
        path_id_mask = tf.logical_and(tf.equal(rows, cols),
                                      tf.logical_not(diag_mask))
        # Filter the mask by label. Positive labels are 1.
        row_labels = tf.tile(tf.reshape(labels, [self._current_batch_size, 1]),
                             [1, self._current_batch_size])
        col_labels = tf.tile(tf.reshape(labels, [1, self._current_batch_size]),
                             [self._current_batch_size, 1])
        label_mask = tf.logical_and(tf.cast(row_labels, tf.bool),
                                    tf.cast(col_labels, tf.bool))

        # M[i, j]=0 (i!=j) if path_id_mask[i,j] is True and label_mask[i, j] is True
        similarity_mask = tf.logical_not(
            tf.logical_and(path_id_mask, label_mask))
        # Add timestep dim as required by actor. Shape = [1, batch, batch]
        similarity_mask = tf.expand_dims(similarity_mask, axis=0)

        # Computes logits by transforming similarity from [-1, 1] to unbound.
        # Shape: [time, batch, batch]
        similarity_logits = self.similarity_scaler * similarity

        output_logits = {
            'similarity': similarity_logits,
            'similarity_mask': similarity_mask,
            'labels': labels
        }

        # Logits for classification loss. Shape = [time, batch]
        classification_logits = (
            self.affine_a * tf.linalg.diag_part(similarity) + self.affine_b)

        return common.AgentOutput(policy_logits=output_logits,
                                  baseline=classification_logits)
Пример #4
0
    def _head(self, neck_outputs):
        # Shape : [time * batch]
        path_ids = neck_outputs[constants.PATH_ID]
        path_ids = tf.transpose(
            tf.reshape(
                path_ids,
                [self._current_num_timesteps, self._current_batch_size]))

        # <tf.float32>[time * batch_size, 1, hidden_dim]
        visual_feature = neck_outputs['visual_feature']
        # <tf.float32>[time * batch_size, num_tokens, hidden_dim]
        raw_text_feature = tf.reshape(
            neck_outputs['text_feature'],
            [self._current_num_timesteps, self._current_batch_size] +
            neck_outputs['text_feature'].shape[1:].as_list())
        # Shape = [batch_size, time, num_tokens, hidden_dim]
        raw_text_feature = tf.transpose(raw_text_feature, perm=[1, 0, 2, 3])

        # <tf.float32>[time, batch_size, 1, hidden_dim]
        visual_feature = tf.reshape(
            visual_feature,
            [self._current_num_timesteps, self._current_batch_size] +
            visual_feature.shape[1:].as_list())

        # <tf.float32>[batch_size, time, hidden_dim]
        visual_feature = tf.squeeze(visual_feature, axis=2)
        visual_feature = tf.transpose(visual_feature, [1, 0, 2])

        first_true = utils.get_first_true_column(
            tf.reshape(
                neck_outputs[constants.DISC_MASK],
                [self._current_num_timesteps, self._current_batch_size]))
        first_true = tf.transpose(first_true)

        # Sanity Check: path_ids are consistent for first_true and last_true.
        last_true = utils.get_last_true_column(
            tf.reshape(
                neck_outputs[constants.DISC_MASK],
                [self._current_num_timesteps, self._current_batch_size]))
        last_true = tf.transpose(last_true)
        path_ids_first_true = tf.cond(
            tf.keras.backend.any(first_true),
            lambda: tf.boolean_mask(path_ids, first_true),
            lambda: path_ids[:, 0])
        path_ids_last_true = tf.cond(
            tf.keras.backend.any(last_true),
            lambda: tf.boolean_mask(path_ids, last_true),
            lambda: path_ids[:, 0])
        tf.debugging.assert_equal(path_ids_first_true, path_ids_last_true)

        # <tf.float32>[batch_size, num_tokens, hidden_dim]
        text_feature = tf.cond(
            tf.keras.backend.any(first_true),
            lambda: tf.boolean_mask(raw_text_feature, first_true),
            lambda: raw_text_feature[:, 0, :, :])

        text_feature_last_true = tf.cond(
            tf.keras.backend.any(last_true),
            lambda: tf.boolean_mask(raw_text_feature, last_true),
            lambda: raw_text_feature[:, 0, :, :])
        tf.debugging.assert_equal(text_feature, text_feature_last_true)
        # visual_feature = tf.nn.l2_normalize(visual_feature, axis=2)
        # text_feature = tf.nn.l2_normalize(text_feature, axis=2)

        # <tf.float32>[batch_size, time, num_tokens]
        alpha_i_j = tf.matmul(visual_feature,
                              tf.transpose(text_feature, perm=[0, 2, 1]))
        # <tf.float32>[batch, time, num_tokens]
        c_i_j = tf.nn.softmax(alpha_i_j)
        # <tf.float32>[batch_size, time, num_tokens]
        mask = tf.cast(
            tf.transpose(tf.reshape(
                neck_outputs[constants.DISC_MASK],
                [self._current_num_timesteps, self._current_batch_size]),
                         perm=[1, 0]), tf.float32)

        # <tf.float32>[batch, time]
        score = tf.reduce_sum(c_i_j * alpha_i_j, 2)

        # Compute softmin(x) = softmax(-x)
        # Use stable softmax since softmax(x) = softmax(x+c) for any constant c.
        # Here we use constant c = max(-x).
        negative_score = -1.0 * score
        escore = tf.exp(negative_score - tf.reduce_max(negative_score)) * mask
        sum_escore = tf.tile(tf.expand_dims(tf.reduce_sum(escore, 1), 1),
                             [1, tf.shape(escore)[1]])
        score_weight = tf.divide(escore, sum_escore)

        similarities = tf.reduce_sum(mask * score * score_weight, 1)
        similarities = tf.expand_dims(similarities, axis=0)
        # shape: [time * batch_size]
        similarities = tf.reshape(
            tf.tile(similarities, [self._current_num_timesteps, 1]), [-1])

        # Apply an affine transform.
        similarities = similarities * self.affine_a + self.affine_b

        output_a = tf.reshape(tf.convert_to_tensor(self.affine_a), [1, 1])
        output_b = tf.reshape(tf.convert_to_tensor(self.affine_b), [1, 1])

        # shape: [time * batch]
        output_a = tf.reshape(
            tf.tile(output_a,
                    [self._current_num_timesteps, self._current_batch_size]),
            [-1])
        output_b = tf.reshape(
            tf.tile(output_b,
                    [self._current_num_timesteps, self._current_batch_size]),
            [-1])

        return common.AgentOutput(policy_logits=similarities,
                                  baseline=(output_a, output_b))
Пример #5
0
  def _head(self, neck_outputs):

    # <tf.float32>[time * batch_size, 1, hidden_dim]
    visual_feature = neck_outputs['visual_feature']
    # <tf.float32>[time * batch_size, num_tokens, hidden_dim]
    text_feature = neck_outputs['text_feature']

    # <tf.float32>[time, batch_size, 1, hidden_dim]
    visual_feature = tf.reshape(
        visual_feature,
        [self._current_num_timesteps, self._current_batch_size] +
        visual_feature.shape[1:].as_list())

    # <tf.float32>[batch_size, time, hidden_dim]
    visual_feature = tf.squeeze(visual_feature, axis=2)
    visual_feature = tf.transpose(visual_feature, [1, 0, 2])

    first_true = utils.get_first_true_column(
        tf.reshape(neck_outputs[constants.DISC_MASK],
                   [self._current_num_timesteps, self._current_batch_size]))

    # <tf.float32>[batch_size, num_tokens, hidden_dim]
    text_feature = tf.cond(
        tf.keras.backend.any(first_true),
        lambda: tf.boolean_mask(text_feature, tf.reshape(first_true, [-1])),
        lambda: tf.reshape(text_feature, [
            self._current_num_timesteps, self._current_batch_size
        ] + text_feature.shape[1:].as_list())[0, :, :, :])
    # visual_feature = tf.nn.l2_normalize(visual_feature, axis=2)
    # text_feature = tf.nn.l2_normalize(text_feature, axis=2)

    # <tf.float32>[batch_size, time, num_tokens]
    alpha_i_j = tf.matmul(visual_feature,
                          tf.transpose(text_feature, perm=[0, 2, 1]))
    # <tf.float32>[batch_size, time, num_tokens]
    ealpha_i_j = tf.exp(alpha_i_j)
    sum_i_j = tf.tile(
        tf.expand_dims(tf.reduce_sum(ealpha_i_j, 2), 2),
        [1, 1, tf.shape(ealpha_i_j)[2]])
    mask = tf.cast(
        tf.transpose(
            tf.reshape(neck_outputs[constants.DISC_MASK],
                       [self._current_num_timesteps, self._current_batch_size]),
            perm=[1, 0]), tf.float32)
    # <tf.float32>[batch, time, num_tokens]
    c_i_j = tf.divide(ealpha_i_j, sum_i_j)
    # <tf.float32>[batch, time]
    score = tf.reduce_sum(c_i_j * alpha_i_j, 2)

    escore = tf.exp(-1 * score) * mask
    sum_escore = tf.tile(
        tf.expand_dims(tf.reduce_sum(escore, 1), 1), [1, tf.shape(escore)[1]])
    score_weight = tf.divide(escore, sum_escore)
    similarities = tf.reduce_sum(mask * score * score_weight, 1)
    similarities = tf.expand_dims(similarities, axis=0)
    # [time_step, batch_size]
    similarities = tf.tile(similarities, [self._current_num_timesteps, 1])

    # Apply an affine transform.
    similarities = similarities * self.affine_a + self.affine_b

    output_a = tf.reshape(tf.convert_to_tensor(self.affine_a), [1, 1])
    output_b = tf.reshape(tf.convert_to_tensor(self.affine_b), [1, 1])

    output_a = tf.tile(output_a,
                       [self._current_num_timesteps, self._current_batch_size])
    output_b = tf.tile(output_b,
                       [self._current_num_timesteps, self._current_batch_size])

    return common.AgentOutput(
        policy_logits=similarities, baseline=(output_a, output_b))