示例#1
0
def _get_relative_position_embeddings(
    full_position_embeddings,
    token_type_ids,
    token_type_vocab_size,
    seq_length,
    batch_size,
    max_position_embeddings,
):
  """Create position embeddings that restart at every cell."""
  col_index = segmented_tensor.IndexMap(
      token_type_ids[1], token_type_vocab_size[1], batch_dims=1)
  row_index = segmented_tensor.IndexMap(
      token_type_ids[2], token_type_vocab_size[2], batch_dims=1)
  full_index = segmented_tensor.ProductIndexMap(col_index, row_index)
  position = tf.expand_dims(tf.range(seq_length), axis=0)
  logging.info("position: %s", position)
  batched_position = tf.repeat(position, repeats=batch_size, axis=0)
  logging.info("batched_position: %s", batched_position)
  logging.info("token_type_ids: %s", token_type_ids[1])
  first_position_per_segment = segmented_tensor.reduce_min(
      batched_position, full_index)[0]
  first_position = segmented_tensor.gather(first_position_per_segment,
                                           full_index)
  position_embeddings = tf.nn.embedding_lookup(
      full_position_embeddings,
      tf.math.minimum(max_position_embeddings - 1, position - first_position))
  return position_embeddings
示例#2
0
def get_token_scores_from_column_scores(
    column_ids,
    column_probs,
    input_mask,
    max_num_columns,
):
    """Given the columns scores in [0,1] extracts the tokens scores.

  It also gives a score of 1.0 for the question's tokens and padding.

  Args:
    column_ids: <int32>[batch_size, seq_length] additional to the columns' ids
      [1, max_num_columns] the value 0 refers to question tokens and padding.
    column_probs: <float32>[batch_size, max_column_id]: contains only the
      columns' scores: question score or padding not included. The expected
        values are in [0,1].
    input_mask: <float32>[batch_size, seq_length] used to zero-out the padding.
    max_num_columns: the maximum number of columns.

  Returns:
    <float32>[batch_size, seq_length]: The tokens' scores.
  """
    col_index = segmented_tensor.IndexMap(indices=column_ids,
                                          num_segments=max_num_columns + 1,
                                          batch_dims=1)
    # <float32>[batch size, max_num_columns+1]: it contains the question at pos 0.
    # The scores for the question and padding is 1.
    padded_column_scores = tf.pad(column_probs,
                                  paddings=[[0, 0], [1, 0]],
                                  constant_values=1.0)
    # <float32>[batch_size, seq_length]
    return segmented_tensor.gather(index=col_index,
                                   values=padded_column_scores) * tf.cast(
                                       input_mask, dtype=tf.float32)
 def test_gather_vectorized(self):
   values = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
   index = segmented_tensor.IndexMap(
       indices=[[0, 1], [1, 0]], num_segments=2, batch_dims=1)
   result = segmented_tensor.gather(values, index)
   with self.session() as sess:
     self.assertAllEqual(
         sess.run(result), [[[1, 2], [3, 4]], [[7, 8], [5, 6]]])
  def test_gather(self):
    values, row_index, col_index = self._prepare_tables()
    cell_index = segmented_tensor.ProductIndexMap(row_index, col_index)

    # Compute sums and then gather. The result should have the same shape as
    # the original table and each element should contain the sum the values in
    # its cell.
    sums, _ = segmented_tensor.reduce_sum(values, cell_index)
    cell_sum = segmented_tensor.gather(sums, cell_index)
    cell_sum.shape.assert_is_compatible_with(values.shape)

    with self.session() as sess:
      self.assertAllClose(
          sess.run(cell_sum),
          [[[3.0, 3.0, 3.0], [2.0, 2.0, 1.0], [4.0, 4.0, 4.0]],
           [[1.0, 2.0, 3.0], [2.0, 0.0, 1.0], [1.0, 3.0, 4.0]]])
示例#5
0
    def call(self, input_token_ids, input_mask, segment_ids, column_ids,
             row_ids, prev_label_ids, column_ranks, inv_column_ranks,
             numeric_relations, label_ids, **kwargs):

        # Construct indices for the table.
        row_index = segmented_tensor.IndexMap(
            indices=tf.minimum(tf.cast(row_ids, tf.int32),
                               self.tapas_classifier_config.max_num_rows - 1),
            num_segments=self.tapas_classifier_config.max_num_rows,
            batch_dims=1)
        col_index = segmented_tensor.IndexMap(
            indices=tf.minimum(
                tf.cast(column_ids, tf.int32),
                self.tapas_classifier_config.max_num_columns - 1),
            num_segments=self.tapas_classifier_config.max_num_columns,
            batch_dims=1)
        cell_index = segmented_tensor.ProductIndexMap(row_index, col_index)

        # Masks.
        # <float32>[batch_size, seq_length]
        table_mask = tf.where(row_ids > 0, tf.ones_like(row_ids),
                              tf.zeros_like(row_ids))
        input_mask_float = tf.cast(input_mask, tf.float32)
        table_mask_float = tf.cast(table_mask, tf.float32)

        # Mask for cells that exist in the table (i.e. that are not padding).
        cell_mask, _ = segmented_tensor.reduce_mean(input_mask_float,
                                                    cell_index)

        pooled_output, sequence_output = self.bert([
            input_token_ids, input_mask, segment_ids, column_ids, row_ids,
            prev_label_ids, column_ranks, inv_column_ranks, numeric_relations
        ], **kwargs)
        # Compute logits per token. These are used to select individual cells.
        logits = self.compute_token_logits(sequence_output)
        # Compute logits per column. These are used to select a column.
        if self.tapas_classifier_config.select_one_column:
            column_logits = self.compute_column_logits(sequence_output,
                                                       cell_index, cell_mask)

        logits_cls = None
        if self.do_model_classification:
            logits_cls = self.compute_classification_logits(pooled_output)

        if self.tapas_classifier_config.average_logits_per_cell:
            logits_per_cell, _ = segmented_tensor.reduce_mean(
                logits, cell_index)
            logits = segmented_tensor.gather(logits_per_cell, cell_index)
        dist_per_token = tfp.distributions.Bernoulli(logits=logits)

        if self.tapas_classifier_config.select_one_column:
            logits = single_column_cell_selection(logits, column_logits,
                                                  label_ids, cell_index,
                                                  col_index, cell_mask)
            dist_per_token = tfp.distributions.Bernoulli(logits=logits)

        logits_aggregation = None
        if self.do_model_aggregation:
            logits_aggregation = self.calculate_aggregation_logits(
                pooled_output)

        probs = _get_probs(dist_per_token) * input_mask_float

        return logits, probs, logits_aggregation, logits_cls
示例#6
0
def single_column_cell_selection(token_logits, column_logits, label_ids,
                                 cell_index, col_index, cell_mask):
    """Computes the loss for cell selection constrained to a single column.

    The loss is a hierarchical log-likelihood. The model first predicts a column
    and then selects cells within that column (conditioned on the column). Cells
    outside the selected column are never selected.

    Args:
      token_logits: <float>[batch_size, seq_length] Logits per token.
      column_logits: <float>[batch_size, max_num_cols] Logits per column.
      label_ids: <int32>[batch_size, seq_length] Labels per token.
      cell_index: segmented_tensor.IndexMap [batch_size, seq_length] Index that
        groups tokens into cells.
      col_index: segmented_tensor.IndexMap [batch_size, seq_length] Index that
        groups tokens into columns.
      cell_mask: <float>[batch_size, max_num_rows * max_num_cols] Input mask per
        cell, 1 for cells that exists in the example and 0 for padding.

    Returns:
      selection_loss_per_example: <float>[batch_size] Loss for each example.
      logits: <float>[batch_size, seq_length] New logits which are only allowed
        to select cells in a single column. Logits outside of the most likely
        column according to `column_logits` will be set to a very low value
        (such that the probabilities are 0).
    """
    # First find the column we should select. We use the column with maximum
    # number of selected cells.
    labels_per_column, _ = segmented_tensor.reduce_sum(
        tf.cast(label_ids, tf.float32), col_index)
    column_label = tf.argmax(labels_per_column, axis=-1, output_type=tf.int32)
    # Check if there are no selected cells in the column. In that case the model
    # should predict the special column id 0, which means "select nothing".
    no_cell_selected = tf.equal(tf.reduce_max(labels_per_column, axis=-1), 0)
    column_label = tf.where(no_cell_selected, tf.zeros_like(column_label),
                            column_label)

    column_dist = tfp.distributions.Categorical(logits=column_logits)

    # Reduce the labels and logits to per-cell from per-token.
    logits_per_cell, _ = segmented_tensor.reduce_mean(token_logits, cell_index)
    _, labels_index = segmented_tensor.reduce_max(tf.cast(label_ids, tf.int32),
                                                  cell_index)

    # Mask for the selected column.
    column_id_for_cells = cell_index.project_inner(labels_index).indices

    # Set the probs outside the selected column (selected by the *model*)
    # to 0. This ensures backwards compatibility with models that select
    # cells from multiple columns.
    selected_column_id = tf.argmax(column_logits,
                                   axis=-1,
                                   output_type=tf.int32)
    selected_column_mask = tf.cast(
        tf.equal(column_id_for_cells,
                 tf.expand_dims(selected_column_id, axis=-1)), tf.float32)
    # Never select cells with the special column id 0.
    selected_column_mask = tf.where(tf.equal(column_id_for_cells, 0),
                                    tf.zeros_like(selected_column_mask),
                                    selected_column_mask)
    logits_per_cell += CLOSE_ENOUGH_TO_LOG_ZERO * (
        1.0 - cell_mask * selected_column_mask)
    logits = segmented_tensor.gather(logits_per_cell, cell_index)

    return logits
示例#7
0
def _get_classification_outputs(
    config,
    is_training,
    output_layer,
    output_layer_aggregation,
    label_ids,
    input_mask,
    table_mask,
    aggregation_function_id,
    answer,
    numeric_values,
    numeric_values_scale,
    row_ids,
    column_ids,
    classification_class_index,
):
    """Creates a classification model.

  Args:
    config: Configuration for Tapas model.
    is_training: Whether the model is training.
    output_layer: <float32>[batch_size, seq_length, hidden_size]
    output_layer_aggregation: <float32>[batch_size, hidden_size]
    label_ids: <int32>[batch_size, seq_length]
    input_mask: <int32>[batch_size, seq_length]
    table_mask: <int32>[batch_size, seq_length]
    aggregation_function_id: <int32>[batch_size]
    answer: <float32>[batch_size]
    numeric_values: <float32>[batch_size, seq_length]
    numeric_values_scale: <float32>[batch_size, seq_length]
    row_ids: <int32>[batch_size, seq_length]
    column_ids: <int32>[batch_size, seq_length]
    classification_class_index: <int32>[batch]

  Returns:
    Outputs
  """
    if is_training:
        # I.e., 0.1 dropout
        output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    # Construct indices for the table.
    row_index = segmented_tensor.IndexMap(indices=tf.minimum(
        row_ids, config.max_num_rows - 1),
                                          num_segments=config.max_num_rows,
                                          batch_dims=1)
    col_index = segmented_tensor.IndexMap(indices=tf.minimum(
        column_ids, config.max_num_columns - 1),
                                          num_segments=config.max_num_columns,
                                          batch_dims=1)
    cell_index = segmented_tensor.ProductIndexMap(row_index, col_index)

    # Masks.
    # <float32>[batch_size, seq_length]
    input_mask_float = tf.cast(input_mask, tf.float32)
    table_mask_float = tf.cast(table_mask, tf.float32)
    # Mask for cells that exist in the table (i.e. that are not padding).
    cell_mask, _ = segmented_tensor.reduce_mean(input_mask_float, cell_index)

    # Compute logits per token. These are used to select individual cells.
    logits = utils.compute_token_logits(
        output_layer=output_layer,
        temperature=config.temperature,
        init_cell_selection_weights_to_zero=(
            config.init_cell_selection_weights_to_zero))

    # Compute logits per column. These are used to select a column.
    if config.select_one_column:
        column_logits = utils.compute_column_logits(
            output_layer=output_layer,
            cell_index=cell_index,
            cell_mask=cell_mask,
            init_cell_selection_weights_to_zero=(
                config.init_cell_selection_weights_to_zero),
            allow_empty_column_selection=config.allow_empty_column_selection)

    # TODO(pawelnow): Extract this into a function.
    # Compute aggregation function logits.
    do_model_aggregation = config.num_aggregation_labels > 0
    if do_model_aggregation:
        hidden_size_agg = output_layer_aggregation.shape[-1].value
        output_weights_agg = tf.get_variable(
            "output_weights_agg",
            shape=[config.num_aggregation_labels, hidden_size_agg],
            initializer=_classification_initializer())
        output_bias_agg = tf.get_variable(
            "output_bias_agg",
            shape=[config.num_aggregation_labels],
            initializer=tf.zeros_initializer())

    do_model_classification = config.num_classification_labels > 0
    logits_cls = None
    if do_model_classification:
        logits_cls = compute_classification_logits(
            config.num_classification_labels, output_layer_aggregation)

    with tf.variable_scope("loss"):
        total_loss = 0.0
        is_supervised = (not do_model_aggregation
                         or not config.use_answer_as_supervision)

        ### Semi-supervised cell selection in case of no aggregation
        #############################################################

        # If the answer (the denotation) appears directly in the table we might
        # select the answer without applying any aggregation function. There are
        # some ambiguous cases, see _calculate_aggregate_mask for more info.
        # `aggregate_mask` is 1 for examples where we chose to aggregate and 0
        #  for examples where we chose to select the answer directly.
        # `label_ids` encodes the positions of the answer appearing in the table.
        if is_supervised:
            aggregate_mask = None
        else:
            # <float32>[batch_size]
            aggregate_mask = _calculate_aggregate_mask(
                answer=answer,
                output_layer_aggregation=output_layer_aggregation,
                output_bias_agg=output_bias_agg,
                output_weights_agg=output_weights_agg,
                cell_select_pref=config.cell_select_pref,
                label_ids=label_ids)

        ### Cell selection log-likelihood
        ###################################

        if config.average_logits_per_cell:
            logits_per_cell, _ = segmented_tensor.reduce_mean(
                logits, cell_index)
            logits = segmented_tensor.gather(logits_per_cell, cell_index)
        dist_per_token = tfp.distributions.Bernoulli(logits=logits)

        selection_loss_per_example = None
        if config.select_one_column:
            selection_loss_per_example, logits = _single_column_cell_selection_loss(
                token_logits=logits,
                column_logits=column_logits,
                label_ids=label_ids,
                cell_index=cell_index,
                col_index=col_index,
                cell_mask=cell_mask)
            dist_per_token = tfp.distributions.Bernoulli(logits=logits)
        else:
            weight = tf.where(
                label_ids == 0, tf.ones_like(label_ids, dtype=tf.float32),
                config.positive_weight *
                tf.ones_like(label_ids, dtype=tf.float32))
            selection_loss_per_token = -dist_per_token.log_prob(
                label_ids) * weight
            selection_loss_per_example = (
                tf.reduce_sum(selection_loss_per_token * input_mask_float,
                              axis=1) /
                (tf.reduce_sum(input_mask_float, axis=1) +
                 _EPSILON_ZERO_DIVISION))

        ### Logits for the aggregation function
        #########################################

        logits_aggregation = None
        if do_model_aggregation:
            logits_aggregation = _calculate_aggregation_logits(
                output_layer_aggregation, output_weights_agg, output_bias_agg)

        ### Classification loss
        ###############################
        if do_model_classification:
            one_hot_labels = tf.one_hot(classification_class_index,
                                        depth=config.num_classification_labels,
                                        dtype=tf.float32)
            if config.classification_label_weight:
                label_weights = [
                    config.classification_label_weight.get(i, 1.0)
                    for i in range(config.num_classification_labels)
                ]
                one_hot_labels *= tf.constant(label_weights, dtype=tf.float32)
            log_probs = tf.nn.log_softmax(logits_cls, axis=-1)
            # <float32>[batch_size]
            per_example_classification_intermediate = -tf.reduce_sum(
                one_hot_labels * log_probs, axis=-1)

            cls_loss = tf.reduce_mean(per_example_classification_intermediate)
            total_loss += cls_loss

        ### Supervised cell selection
        ###############################

        span_indexes = None
        span_logits = None
        if config.span_prediction != SpanPredictionMode.NONE:
            (
                span_indexes,
                span_logits,
                span_loss,
            ) = span_prediction_utils.get_span_logits_by_mode(
                config.span_prediction,
                output_layer,
                label_ids,
                column_ids,
                row_ids,
                max_span_length=10,
            )
            total_loss += span_loss
        elif config.disable_per_token_loss:
            pass
        elif config.mask_examples_without_labels:
            total_loss += tf.reduce_mean(
                span_prediction_utils.compute_masked_example_loss(
                    label_ids,
                    selection_loss_per_example,
                ))
        elif is_supervised:
            total_loss += tf.reduce_mean(selection_loss_per_example)
        else:
            # For the not supervissed case, do not assign loss for cell selection
            total_loss += tf.reduce_mean(selection_loss_per_example *
                                         (1.0 - aggregate_mask))

        ### Semi-supervised regression loss and supervised loss for aggregations
        #########################################################################

        if do_model_aggregation:
            # Note that `aggregate_mask` is None if the setting is supervised.
            per_example_additional_loss = _calculate_aggregation_loss(
                logits_aggregation, aggregate_mask, aggregation_function_id,
                config)

            if config.use_answer_as_supervision:
                # Add regression loss for numeric answers which require aggregation.
                answer_loss, large_answer_loss_mask = _calculate_regression_loss(
                    answer, aggregate_mask, dist_per_token, numeric_values,
                    numeric_values_scale, table_mask_float, logits_aggregation,
                    config)
                per_example_additional_loss += answer_loss
                # Zero loss for examples with answer_loss > cutoff.
                per_example_additional_loss *= large_answer_loss_mask

            total_loss += tf.reduce_mean(per_example_additional_loss)

        return Outputs(
            total_loss=total_loss,
            logits=logits,
            probs=_get_probs(dist_per_token) * input_mask_float,
            logits_aggregation=logits_aggregation,
            logits_cls=logits_cls,
            span_indexes=span_indexes,
            span_logits=span_logits,
        )
示例#8
0
    def call(self, inputs, **kwargs):
        """Implements call() for the layer."""
        unpacked_inputs = tf_utils.unpack_inputs(inputs)
        word_embeddings = unpacked_inputs[0]
        segment_ids = unpacked_inputs[1]
        column_ids = unpacked_inputs[2]
        row_ids = unpacked_inputs[3]
        prev_label_ids = unpacked_inputs[4]
        column_ranks = unpacked_inputs[5]
        inv_column_ranks = unpacked_inputs[6]
        numeric_relations = unpacked_inputs[7]
        input_shape = tf_utils.get_shape_list(word_embeddings, expected_rank=3)
        batch_size = input_shape[0]
        seq_length = input_shape[1]
        width = input_shape[2]

        output = word_embeddings
        token_type_ids_list = [segment_ids, column_ids, row_ids, prev_label_ids,
                               column_ranks, inv_column_ranks, numeric_relations]
        token_type_embeddings_list = [self.segment_embeddings, self.column_embeddings, self.row_embeddings, self.prev_label_embeddings,
                                      self.column_ranks_embeddings, self.inv_column_ranks_embeddings, self.numeric_relations_embeddings]
        if self.use_type_embeddings:
            for i, (token_type_ids, type_embeddings) in enumerate(zip(token_type_ids_list, token_type_embeddings_list)):
                flat_token_type_ids = tf.reshape(token_type_ids, [-1])
                one_hot_ids = tf.one_hot(
                    flat_token_type_ids,
                    depth=self.token_type_vocab_size[i],
                    dtype=self.dtype)
                token_type_embeddings = tf.matmul(
                    one_hot_ids, type_embeddings)
                token_type_embeddings = tf.reshape(token_type_embeddings,
                                                   [batch_size, seq_length, width])
                output += token_type_embeddings

        if self.use_position_embeddings:
            if not self.reset_position_index_per_cell:
                position_embeddings = tf.expand_dims(
                    tf.slice(self.position_embeddings, [
                        0, 0], [seq_length, width]),
                    axis=0)
            else:
                col_index = segmented_tensor.IndexMap(
                    token_type_ids_list[1], self.token_type_vocab_size[1], batch_dims=1)
                row_index = segmented_tensor.IndexMap(
                    token_type_ids_list[2], self.token_type_vocab_size[2], batch_dims=1)
                full_index = segmented_tensor.ProductIndexMap(
                    col_index, row_index)
                position = tf.expand_dims(tf.range(seq_length), axis=0)
                batched_position = tf.repeat(
                    position, repeats=batch_size, axis=0)
                first_position_per_segment = segmented_tensor.reduce_min(
                    batched_position, full_index)[0]
                first_position = segmented_tensor.gather(first_position_per_segment,
                                                         full_index)
                position_embeddings = tf.nn.embedding_lookup(self.position_embeddings,
                                                             position - first_position)

            output += position_embeddings

        output = self.output_layer_norm(output)
        output = self.output_dropout(
            output, training=kwargs.get('training', False))

        return output