Exemplo n.º 1
0
 def _info(self):
     # TODO(xquad): Specifies the datasets.DatasetInfo object
     return datasets.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # datasets.features.FeatureConnectors
         features=datasets.Features({
             "id":
             datasets.Value("string"),
             "context":
             datasets.Value("string"),
             "question":
             datasets.Value("string"),
             "answers":
             datasets.features.Sequence({
                 "text":
                 datasets.Value("string"),
                 "answer_start":
                 datasets.Value("int32"),
             }),
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://github.com/deepmind/xquad",
         citation=_CITATION,
         task_templates=[
             QuestionAnsweringExtractive(question_column="question",
                                         context_column="context",
                                         answers_column="answers")
         ],
     )
Exemplo n.º 2
0
 def test_column_mapping(self):
     task = QuestionAnsweringExtractive(
         context_column="input_context", question_column="input_question", answers_column="input_answers"
     )
     self.assertDictEqual(
         {"input_context": "context", "input_question": "question", "input_answers": "answers"}, task.column_mapping
     )
Exemplo n.º 3
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "id":
             datasets.Value("string"),
             "title":
             datasets.Value("string"),
             "context":
             datasets.Value("string"),
             "question":
             datasets.Value("string"),
             "answers":
             datasets.features.Sequence({
                 "text":
                 datasets.Value("string"),
                 "answer_start":
                 datasets.Value("int32"),
             }),
         }),
         # No default supervised_keys (as we have to pass both question
         # and context as input).
         supervised_keys=None,
         homepage="https://rajpurkar.github.io/SQuAD-explorer/",
         citation=_CITATION,
         task_templates=[
             QuestionAnsweringExtractive(question_column="question",
                                         context_column="context",
                                         answers_column="answers")
         ],
     )
Exemplo n.º 4
0
    def test_map_on_task_template(self):
        info = DatasetInfo(task_templates=QuestionAnsweringExtractive())
        dataset = Dataset.from_dict({k: [v] for k, v in SAMPLE_QUESTION_ANSWERING_EXTRACTIVE.items()}, info=info)
        assert isinstance(dataset.info.task_templates, list)
        assert len(dataset.info.task_templates) == 1

        def keep_task(x):
            return x

        def dont_keep_task(x):
            out = deepcopy(SAMPLE_QUESTION_ANSWERING_EXTRACTIVE)
            out["answers"]["foobar"] = 0
            return out

        mapped_dataset = dataset.map(keep_task)
        assert mapped_dataset.info.task_templates == dataset.info.task_templates
        # reload from cache
        mapped_dataset = dataset.map(keep_task)
        assert mapped_dataset.info.task_templates == dataset.info.task_templates

        mapped_dataset = dataset.map(dont_keep_task)
        assert mapped_dataset.info.task_templates == []
        # reload from cache
        mapped_dataset = dataset.map(dont_keep_task)
        assert mapped_dataset.info.task_templates == []
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {
                 "id": datasets.Value("int32"),
                 "title": datasets.Value("string"),
                 "context": datasets.Value("string"),
                 "question": datasets.Value("string"),
                 "answers": datasets.features.Sequence(
                     {
                         "text": datasets.Value("string"),
                         "answer_start": datasets.Value("int32"),
                     }
                 ),
             }
         ),
         supervised_keys=None,
         homepage="",
         citation=_CITATION,
         task_templates=[
             QuestionAnsweringExtractive(
                 question_column="question", context_column="context", answers_column="answers"
             )
         ],
     )
Exemplo n.º 6
0
 def _info(self):
     return ds.DatasetInfo(
         description="",
         citation="",
         homepage="",
         license="",
         features=ds.Features({
             "id":
             ds.Value("string"),
             "title":
             ds.Value("string"),
             "context":
             ds.Value("string"),
             "question":
             ds.Value("string"),
             "answers":
             ds.features.Sequence({
                 "text": ds.Value("string"),
                 "answer_start": ds.Value("int32")
             }),
         }),
         task_templates=[
             QuestionAnsweringExtractive(question_column="question",
                                         context_column="context",
                                         answers_column="answers")
         ],
     )
Exemplo n.º 7
0
 def _info(self):
     features = datasets.Features({
         "document_id":
         datasets.Value("int32"),
         "context":
         datasets.Value("string"),
         "question":
         datasets.Value("string"),
         "is_impossible":
         datasets.Value("bool"),
         "id":
         datasets.Value("int32"),
         "answers":
         datasets.features.Sequence({
             "text": datasets.Value("string"),
             "answer_start": datasets.Value("int32"),
         }),
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[
             QuestionAnsweringExtractive(question_column="question",
                                         context_column="context",
                                         answers_column="answers")
         ],
     )
 def _info(self):
     features = datasets.Features({
         "squad_v2_id":
         datasets.Value("string"),
         "original question":
         datasets.Value("string"),
         "disfluent question":
         datasets.Value("string"),
         "title":
         datasets.Value("string"),
         "context":
         datasets.Value("string"),
         "answers":
         datasets.features.Sequence({
             "text": datasets.Value("string"),
             "answer_start": datasets.Value("int32"),
         }),
     })
     return datasets.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # This defines the different columns of the dataset and their types
         features=
         features,  # Here we define them above because they are different between the two configurations
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage=_HOMEPAGE,
         # License for the dataset if available
         license=_LICENSE,
         # Citation for the dataset
         citation=_CITATION,
         task_templates=[
             QuestionAnsweringExtractive(
                 question_column="disfluent question",
                 context_column="context",
                 answers_column="answers")
         ],
     )
Exemplo n.º 9
0
 def test_from_dict(self):
     input_schema = Features({
         "question": Value("string"),
         "context": Value("string")
     })
     label_schema = Features({
         "answers":
         Sequence({
             "text": Value("string"),
             "answer_start": Value("int32"),
         })
     })
     template_dict = {
         "context_column": "input_input_context",
         "question_column": "input_question",
         "answers_column": "input_answers",
     }
     task = QuestionAnsweringExtractive.from_dict(template_dict)
     self.assertEqual("question-answering-extractive", task.task)
     self.assertEqual(input_schema, task.input_schema)
     self.assertEqual(label_schema, task.label_schema)
Exemplo n.º 10
0
 def _info(self):
     # TODO(tydiqa): Specifies the datasets.DatasetInfo object
     if self.config.name == "primary_task":
         return datasets.DatasetInfo(
             # This is the description that will appear on the datasets page.
             description=_DESCRIPTION,
             # datasets.features.FeatureConnectors
             features=datasets.Features({
                 "passage_answer_candidates":
                 datasets.features.Sequence({
                     "plaintext_start_byte":
                     datasets.Value("int32"),
                     "plaintext_end_byte":
                     datasets.Value("int32"),
                 }),
                 "question_text":
                 datasets.Value("string"),
                 "document_title":
                 datasets.Value("string"),
                 "language":
                 datasets.Value("string"),
                 "annotations":
                 datasets.features.Sequence({
                     # 'annotation_id': datasets.Value('variant'),
                     "passage_answer_candidate_index":
                     datasets.Value("int32"),
                     "minimal_answers_start_byte":
                     datasets.Value("int32"),
                     "minimal_answers_end_byte":
                     datasets.Value("int32"),
                     "yes_no_answer":
                     datasets.Value("string"),
                 }),
                 "document_plaintext":
                 datasets.Value("string"),
                 # 'example_id': datasets.Value('variant'),
                 "document_url":
                 datasets.Value("string")
                 # These are the features of your dataset like images, labels ...
             }),
             # If there's a common (input, target) tuple from the features,
             # specify them here. They'll be used if as_supervised=True in
             # builder.as_dataset.
             supervised_keys=None,
             # Homepage of the dataset for documentation
             homepage="https://github.com/google-research-datasets/tydiqa",
             citation=_CITATION,
         )
     elif self.config.name == "secondary_task":
         return datasets.DatasetInfo(
             description=_DESCRIPTION,
             features=datasets.Features({
                 "id":
                 datasets.Value("string"),
                 "title":
                 datasets.Value("string"),
                 "context":
                 datasets.Value("string"),
                 "question":
                 datasets.Value("string"),
                 "answers":
                 datasets.features.Sequence({
                     "text":
                     datasets.Value("string"),
                     "answer_start":
                     datasets.Value("int32"),
                 }),
             }),
             # No default supervised_keys (as we have to pass both question
             # and context as input).
             supervised_keys=None,
             homepage="https://github.com/google-research-datasets/tydiqa",
             citation=_CITATION,
             task_templates=[
                 QuestionAnsweringExtractive(question_column="question",
                                             context_column="context",
                                             answers_column="answers")
             ],
         )