def add_numeric_values_to_questions(interaction):
    """Adds numeric value spans to all questions."""
    for question in interaction.questions:
        question.text = text_utils.normalize_for_match(question.original_text)
        question.annotations.CopyFrom(
            interaction_pb2.NumericValueSpans(
                spans=number_utils.parse_text(question.text)))
def _add_numeric_reference_from_cell(
    cell,
    references,
    row_index,
    column_index,
):
    """Adds number and date references."""
    text = text_utils.normalize_for_match(cell.text)
    spans = number_utils.parse_text(text)

    for span in spans:

        # Only keep spans that match the entire cell.
        if span.end_index - span.begin_index != len(text):
            continue

        for value in span.values:

            if _is_numerically_one(value):
                # One is special because of singuglar/plural and the pronoun.
                continue

            identifier, reference_type = _to_identifier(text, value)

            _add_identifier(
                identifier,
                reference_type,
                cell.text,
                references,
                row_index,
                column_index,
            )
Пример #3
0
 def test_parse_range(self):
     text = '2005-2010'
     expected_spans = """
   spans {
     begin_index: 0
     end_index: 4
     values {
       float_value: 2005.
     }
     values {
       date {
         year: 2005
       }
     }
   }
   spans {
     begin_index: 5
     end_index: 9
     values {
       float_value: 2010.
     }
     values {
       date {
         year: 2010
       }
     }
   }
   """
     self.assertEqual(_get_spans(expected_spans),
                      number_utils.parse_text(text))
Пример #4
0
 def test_dont_parse_weird_examples(self):
     for text in ('1....', '1,,,,,,', '10000,..', '-.1,,,,00,20'):
         spans = number_utils.parse_text(text)
         self.assertNotEmpty(spans)
         for span in spans:
             # Make sure that we don't parse the entire text as a number.
             self.assertNotEqual((span.begin_index, span.end_index),
                                 (0, len(text)), text)
Пример #5
0
 def test_parse_complete_dates(self, text, day, month, year):
     span = interaction_pb2.NumericValueSpan(
         begin_index=0,
         end_index=len(text),
         values=[
             interaction_pb2.NumericValue(
                 date=interaction_pb2.Date(year=year, month=month, day=day))
         ])
     self.assertEqual([span], number_utils.parse_text(text))
Пример #6
0
 def test_parse_text(self):
     expected_spans = """
   spans {
     begin_index: 0
     end_index: 9
     values {
       float_value: 1000000.0
     }
   }
   spans {
     begin_index: 12
     end_index: 18
     values {
       float_value: 10000.0
     }
   }
   spans {
     begin_index: 19
     end_index: 32
     values {
       date {
         year: 1846
         month: 11
       }
     }
   }
  spans {
     begin_index: 36
     end_index: 49
     values {
       date {
         year: 1847
         month: 2
       }
     }
 }
 spans {
   begin_index: 53
   end_index: 57
   values {
     float_value: 1908.0
   }
   values {
     date {
       year: 1908
     }
   }
 }"""
     self.assertEqual(
         _get_spans(expected_spans),
         number_utils.parse_text(
             '1,000,000, $10,000 November 1846 to February 1847 in 1908'))
def _get_question_references(question):
    """Converts numeric and entity annotations in question to references."""
    references = {}

    spans = number_utils.parse_text(
        text_utils.normalize_for_match(question.original_text))

    for span in spans:
        for value in span.values:
            if _is_numerically_one(value):
                # One is special because of singular/plural and the pronoun.
                continue

            text = question.original_text[span.begin_index:span.end_index]
            identifier, reference_type = _to_identifier(text, value)

            _add_identifier(
                identifier,
                reference_type,
                text,
                references,
                span.begin_index,
                span.end_index,
            )
    annotated_text = question.Extensions[
        annotated_text_pb2.AnnotatedText.annotated_question_ext]
    for annotation in annotated_text.annotations:
        begin_index = annotation.begin_byte_index
        end_index = annotation.end_byte_index
        _add_identifier(
            annotation.identifier,
            ReferenceType.ENTITY,
            question.original_text[begin_index:end_index],
            references,
            begin_index,
            end_index,
        )
    return references
def _get_numeric_values(text):
    """Parses text and returns numeric values."""
    numeric_spans = number_utils.parse_text(text)
    return itertools.chain(*(span.values for span in numeric_spans))
Пример #9
0
 def test_parse_numerals(self, text):
     span = interaction_pb2.NumericValueSpan(
         begin_index=0,
         end_index=len(text),
         values=[interaction_pb2.NumericValue(float_value=12)])
     self.assertEqual([span], number_utils.parse_text(text))
Пример #10
0
 def test_ignore_nans(self):
     self.assertEmpty(number_utils.parse_text('inf'))
     self.assertEmpty(number_utils.parse_text('nan'))
     self.assertEmpty(number_utils.parse_text('Nan Hayworth'))