def add_numeric_values_to_questions(interaction):
    """Adds numeric value spans to all questions."""
    for question in interaction.questions:
        question.text = text_utils.normalize_for_match(question.original_text)
        question.annotations.CopyFrom(
            interaction_pb2.NumericValueSpans(
                spans=number_utils.parse_text(question.text)))
def _add_numeric_reference_from_cell(
    cell,
    references,
    row_index,
    column_index,
):
    """Adds number and date references."""
    text = text_utils.normalize_for_match(cell.text)
    spans = number_utils.parse_text(text)

    for span in spans:

        # Only keep spans that match the entire cell.
        if span.end_index - span.begin_index != len(text):
            continue

        for value in span.values:

            if _is_numerically_one(value):
                # One is special because of singuglar/plural and the pronoun.
                continue

            identifier, reference_type = _to_identifier(text, value)

            _add_identifier(
                identifier,
                reference_type,
                cell.text,
                references,
                row_index,
                column_index,
            )
def _get_column_values(table, col_index):
    """Parses text in column and returns a dict mapping row_index to values."""
    index_to_values = {}
    for row_index, row in enumerate(table.rows):
        text = text_utils.normalize_for_match(row.cells[col_index].text)
        index_to_values[row_index] = list(_get_numeric_values(text))
    return index_to_values
Exemplo n.º 4
0
def _add_text_fn(element):
    key, interaction = element
    new_interaction = interaction_pb2.Interaction()
    new_interaction.CopyFrom(interaction)
    text_utils.filter_invalid_unicode_from_table(new_interaction.table)
    for question in new_interaction.questions:
        question.text = text_utils.normalize_for_match(question.original_text)
    return key, new_interaction
Exemplo n.º 5
0
def _get_question_cost(
    tokenizer,
    question,
):
    r"""Computes length of the serialized question (w/ special token offset)."""
    tokens = tokenizer.tokenize(
        text_utils.normalize_for_match(question.original_text))
    return tokenizer.question_encoding_cost(tokens)
Exemplo n.º 6
0
 def get_interaction(interaction, table, statement, result, name):
     new_interaction = interaction_pb2.Interaction()
     if interaction.id:
         new_interaction.id = interaction.id
     else:
         new_interaction.id = interaction.table.table_id
     new_interaction.table.CopyFrom(_to_table_proto(table))
     new_interaction.table.table_id = interaction.table.table_id
     new_question = new_interaction.questions.add()
     new_question.id = new_interaction.id + '_' + name
     new_question.original_text = statement.verbalize()
     new_question.text = text_utils.normalize_for_match(
         new_question.original_text)
     if result == EvaluationResult.TRUE:
         new_question.answer.class_index = 1
     elif result == EvaluationResult.FALSE:
         new_question.answer.class_index = 0
     else:
         raise ValueError('Unexpected: {result}')
     return new_interaction
def _get_question_references(question):
    """Converts numeric and entity annotations in question to references."""
    references = {}

    spans = number_utils.parse_text(
        text_utils.normalize_for_match(question.original_text))

    for span in spans:
        for value in span.values:
            if _is_numerically_one(value):
                # One is special because of singular/plural and the pronoun.
                continue

            text = question.original_text[span.begin_index:span.end_index]
            identifier, reference_type = _to_identifier(text, value)

            _add_identifier(
                identifier,
                reference_type,
                text,
                references,
                span.begin_index,
                span.end_index,
            )
    annotated_text = question.Extensions[
        annotated_text_pb2.AnnotatedText.annotated_question_ext]
    for annotation in annotated_text.annotations:
        begin_index = annotation.begin_byte_index
        end_index = annotation.end_byte_index
        _add_identifier(
            annotation.identifier,
            ReferenceType.ENTITY,
            question.original_text[begin_index:end_index],
            references,
            begin_index,
            end_index,
        )
    return references
Exemplo n.º 8
0
 def test_normalize_for_match_lowercases(self):
     self.assertEqual("lowercase",
                      text_utils.normalize_for_match("LOWERCASE"))