Exemplo n.º 1
0
 def _generate_examples(self, data_path: str, mapping_path: str):
     """Yields examples."""
     with tf.io.gfile.GFile(mapping_path, "r") as f:
         ids = set(f.read().splitlines())
     for k, ex in qa_utils.generate_squadlike_examples(data_path):
         if k in ids:
             yield k, ex
Exemplo n.º 2
0
    def test_generate_squadlike_examples(self):
        filepath = os.path.join(testing.test_utils.fake_examples_dir(),
                                'xquad', 'translate-test.json')
        examples = qa_utils.generate_squadlike_examples(filepath)

        self.assertEqual(list(examples), [
            ('1', {
                'id':
                '1',
                'title':
                'Zurich_Switzerland',
                'context':
                'Zurich is the largest city in Switzerland with over 400000 '
                'inhabitants. In spite of this, it is not the capital of '
                'Switzerland, which is located in Bern aka Bernie.',
                'question':
                'What is the capital of Switzerland?',
                'answers': {
                    'answer_start': [1, 20, 29],
                    'text': ['Zurich', 'Bern', 'Bernie']
                }
            }),
            ('2', {
                'id':
                '2',
                'title':
                'Zurich_Switzerland',
                'context':
                'Switzerland is the country in Euriope with 26 cantons. Zurich '
                'canton has the largest population of 1.5 million.',
                'question':
                'How many cantons does Switzerland have?',
                'answers': {
                    'answer_start': [8],
                    'text': ['26']
                }
            }),
            ('3', {
                'id':
                '3',
                'title':
                'Paris_France',
                'context':
                'Paris is the largest city in France with over 2 million '
                'inhabitants. It is the capital of France.',
                'question':
                'What is the capital of France?',
                'answers': {
                    'answer_start': [1, 7],
                    'text': ['Paris', 'France']
                }
            })
        ])
Exemplo n.º 3
0
    def _generate_examples(self,
                           squad_data_path,
                           mapping_path=None,
                           qgen_data_path=None):
        r"""Yields question generation examples.

    Args:
      squad_data_path: Path to SQuAD json file.
      mapping_path:  File with SQuAD map id for the example in Zhou et al.
        splits.
      qgen_data_path:  File with examples in "TokenizedInputSentence\t
        AnswerStartAndEndPosition\tParsingTreeOfInputSentence\t
        PoSTagOfInputSentence\tNERTagsOfInputSentence\tTokenizedQuestion\t
        UntokenizedInputSentence\tAnswerStartCharIndex\tAnswer\t
        UntokenizedQuestion" format per line, for the Zhou et al. splits.

    Yields:
      key and example dict.
    """
        if self.builder_config.name == "split_du":
            # The file format slightly differs from the original SQuAD JSON format.
            with tf.io.gfile.GFile(squad_data_path) as f:
                squad_data = json.load(f)
                for article in squad_data:
                    for paragraph in article["paragraphs"]:
                        context = paragraph["context"]
                        for qa in paragraph["qas"]:
                            yield qa["id"], {
                                _CONTEXT_PASSAGE: context,
                                _ANSWER: qa["answers"][0]["text"],
                                _QUESTION: qa["question"]
                            }
        elif self.builder_config.name == "split_zhou":
            squad_data = {}
            for k, ex in qa_utils.generate_squadlike_examples(squad_data_path):
                squad_data[k] = ex
            with tf.io.gfile.GFile(mapping_path, "r") as mapping_file:
                with tf.io.gfile.GFile(qgen_data_path, "r") as qgen_data_file:
                    for (ex_id, (squad_id, qgen_data)) in enumerate(
                            zip(mapping_file.read().splitlines(),
                                qgen_data_file.read().splitlines())):
                        (_, _, _, _, _, _, context_sentence, _, answer,
                         question) = qgen_data.split("\t")
                        context_passage = squad_data[squad_id]["context"]
                        yield str(ex_id).zfill(7), {
                            _CONTEXT_SENTENCE: context_sentence,
                            _CONTEXT_PASSAGE: context_passage,
                            _ANSWER: answer,
                            _QUESTION: question
                        }
Exemplo n.º 4
0
 def _generate_examples(self, filepath):
     return qa_utils.generate_squadlike_examples(filepath)
Exemplo n.º 5
0
    def _generate_examples(self, filepath):

        if self.builder_config.name == "v1.1":
            return qa_utils.generate_squadlike_examples(filepath)
        return _generate_v2_examples(filepath)