Пример #1
0
 def _info(self):
     # TODO(blended_skill_talk): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "personas":
             nlp.features.Sequence(nlp.Value("string")),
             "additional_context":
             nlp.Value("string"),
             "previous_utterance":
             nlp.features.Sequence(nlp.Value("string")),
             "context":
             nlp.Value("string"),
             "free_messages":
             nlp.features.Sequence(nlp.Value("string")),
             "guided_messages":
             nlp.features.Sequence(nlp.Value("string")),
             "suggestions":
             nlp.features.Sequence(
                 {task: nlp.Value("string")
                  for task in _TASK})
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://parl.ai/projects/bst/",
         citation=_CITATION,
     )
 def _info(self):
     # TODO(race): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "article":
             nlp.Value("string"),
             "answer":
             nlp.Value("string"),
             "question":
             nlp.Value("string"),
             "options":
             nlp.features.Sequence({"option": nlp.Value("string")})
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="http://www.cs.cmu.edu/~glai1/data/race/",
         citation=_CITATION,
     )
Пример #3
0
 def _info(self):
     # TODO(wiqa): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features(
             {
                 # These are the features of your dataset like images, labels ...
                 "question_stem": nlp.Value("string"),
                 "question_para_step": nlp.features.Sequence(nlp.Value("string")),
                 "answer_label": nlp.Value("string"),
                 "answer_label_as_choice": nlp.Value("string"),
                 "choices": nlp.features.Sequence({"text": nlp.Value("string"), "label": nlp.Value("string")}),
                 "metadata_question_id": nlp.Value("string"),
                 "metadata_graph_id": nlp.Value("string"),
                 "metadata_para_id": nlp.Value("string"),
                 "metadata_question_type": nlp.Value("string"),
                 "metadata_path_len": nlp.Value("int32"),
             }
         ),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://allenai.org/data/wiqa",
         citation=_CITATION,
     )
Пример #4
0
def read_batch_unformated(feats, tmp_dir):
    batch_size = 10
    dataset = nlp.Dataset.from_file(filename=os.path.join(
        tmp_dir, "beta.arrow"),
                                    info=nlp.DatasetInfo(features=feats))
    for i in range(0, len(dataset), batch_size):
        _ = dataset[i:i + batch_size]
Пример #5
0
 def _info(self):
     # TODO(coqa): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "source":
             nlp.Value("string"),
             "story":
             nlp.Value("string"),
             "questions":
             nlp.features.Sequence(nlp.Value("string")),
             "answers":
             nlp.features.Sequence({
                 "input_text": nlp.Value("string"),
                 "answer_start": nlp.Value("int32"),
                 "answer_end": nlp.Value("int32"),
             }),
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://stanfordnlp.github.io/coqa/",
         citation=_CITATION,
     )
Пример #6
0
 def _info(self):
     # TODO(x_stance): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "question": nlp.Value("string"),
             "id": nlp.Value("int32"),
             "question_id": nlp.Value("int32"),
             "language": nlp.Value("string"),
             "comment": nlp.Value("string"),
             "label": nlp.Value("string"),
             "numerical_label": nlp.Value("int32"),
             "author": nlp.Value("string"),
             "topic": nlp.Value("string")
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://github.com/ZurichNLP/xstance",
         citation=_CITATION,
     )
Пример #7
0
 def _info(self):
     # TODO(math_qa): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features(
             {
                 # These are the features of your dataset like images, labels ...
                 "Problem": nlp.Value("string"),
                 "Rationale": nlp.Value("string"),
                 "options": nlp.Value("string"),
                 "correct": nlp.Value("string"),
                 "annotated_formula": nlp.Value("string"),
                 "linear_formula": nlp.Value("string"),
                 "category": nlp.Value("string"),
             }
         ),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://math-qa.github.io/math-QA/",
         citation=_CITATION,
     )
Пример #8
0
 def _info(self):
     # TODO(cosmos_qa): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "id": nlp.Value("string"),
             "context": nlp.Value("string"),
             "question": nlp.Value("string"),
             "answer0": nlp.Value("string"),
             "answer1": nlp.Value("string"),
             "answer2": nlp.Value("string"),
             "answer3": nlp.Value("string"),
             "label": nlp.Value("int32")
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://wilburone.github.io/cosmos/",
         citation=_CITATION,
     )
Пример #9
0
 def _info(self):
     # TODO(openBookQA): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             # These are the features of your dataset like images, labels ...
             'id':
             nlp.Value('string'),
             'question_stem':
             nlp.Value('string'),
             'choices':
             nlp.features.Sequence({
                 'text': nlp.Value('string'),
                 'label': nlp.Value('string')
             }),
             'answerKey':
             nlp.Value('string')
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage='https://allenai.org/data/open-book-qa',
         citation=_CITATION,
     )
Пример #10
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION + '\n' + self.config.description,
         features=nlp.Features(
             {
                 "category": nlp.Value("string"),
                 "air_date": nlp.Value("string"),
                 "question": nlp.Value("string"),
                 "value": nlp.Value("string"),
                 "answer": nlp.Value("string"),
                 "round": nlp.Value("string"),
                 "category": nlp.Value("string"),
                 "show_number": nlp.Value("int32"),
                 "search_results": nlp.features.Sequence({
                     'urls': nlp.Value('string'),
                     "snippets": nlp.Value('string'),
                     "titles": nlp.Value('string'),
                     "related_links": nlp.Value('string')
                 })
                 # These are the features of your dataset like images, labels ...
             }
         ),
         homepage="https://github.com/nyu-dl/dl4ir-searchQA",
         citation=_CITATION,
     )
Пример #11
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             "id":
             nlp.Value("string"),
             "title":
             nlp.Value("string"),
             "context":
             nlp.Value("string"),
             "question":
             nlp.Value("string"),
             "answers":
             nlp.features.Sequence({
                 "text": nlp.Value("string"),
                 "answer_start": nlp.Value("int32")
             }),
         }),
         # No default supervised_keys (as we have to pass both question
         # and context as input).
         supervised_keys=None,
         homepage=
         "https://github.com/husseinmozannar/SOQAL/tree/master/data",
         citation=_CITATION,
     )
Пример #12
0
 def _info(self):
     # TODO(reclor): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             # These are the features of your dataset like images, labels ...
             "context":
             nlp.Value("string"),
             "question":
             nlp.Value("string"),
             "answers":
             nlp.features.Sequence(nlp.Value("string")),
             "label":
             nlp.Value("string"),
             "id_string":
             nlp.Value("string"),
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="http://whyu.me/reclor/",
         citation=_CITATION,
     )
Пример #13
0
 def _info(self):
     return nlp.DatasetInfo(
         features=nlp.Features(
             {"a": nlp.Sequence({"b": nlp.Value("string")})}),
         # No default supervised_keys.
         supervised_keys=None,
     )
Пример #14
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION + self.config.description,
         features=nlp.Features(self.config.features),
         homepage=_HOMEPAGE,
         citation=_CITATION,
     )
Пример #15
0
    def _info(self):
        # TODO(fquad): Specifies the nlp.DatasetInfo object
        return nlp.DatasetInfo(
            # This is the description that will appear on the datasets page.
            description=_DESCRIPTION,
            # nlp.features.FeatureConnectors
            features=nlp.Features({
                'context':
                nlp.Value('string'),
                'questions':
                nlp.features.Sequence({
                    'question': nlp.Value('string'),
                }),
                'answers':
                nlp.features.Sequence({
                    'texts': nlp.Value('string'),
                    'answers_starts': nlp.Value('int32')
                }),

                # These are the features of your dataset like images, labels ...
            }),
            # If there's a common (input, target) tuple from the features,
            # specify them here. They'll be used if as_supervised=True in
            # builder.as_dataset.
            supervised_keys=None,
            # Homepage of the dataset for documentation
            homepage='https://fquad.illuin.tech/',
            citation=_CITATION,
        )
Пример #16
0
 def _info(self):
     
     if self.config.name == 'wiki_pages':
         features = {
             'id': nlp.Value('string'),
             'text': nlp.Value('string'),
             'lines': nlp.Value('string'),
         }
     else:
         features = {
             'id': nlp.Value('int32'),
             'label': nlp.Value('string'),
             'claim': nlp.Value('string'),
             'evidence_annotation_id': nlp.Value('int32'),
             'evidence_id': nlp.Value('int32'),
             'evidence_wiki_url': nlp.Value('string'),
             'evidence_sentence_id': nlp.Value('int32')
         
         }
     return nlp.DatasetInfo(
         description=_DESCRIPTION + '\n' + self.config.description,
         features=nlp.Features(features),
         homepage="https://fever.ai/",
         citation=_CITATION,
     )
Пример #17
0
 def _info(self):
     # TODO(quarel): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             # These are the features of your dataset like images, labels ...
             'id':
             nlp.Value('string'),
             'answer_index':
             nlp.Value('int32'),
             'logical_forms':
             nlp.features.Sequence({'logical_form': nlp.Value('string')}),
             'logical_form_pretty':
             nlp.Value('string'),
             'world_literals':
             nlp.features.Sequence({
                 'world1': nlp.Value('string'),
                 'world2': nlp.Value('string')
             }),
             'question':
             nlp.Value('string')
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage='https://allenai.org/data/quarel',
         citation=_CITATION,
     )
Пример #18
0
 def _info(self):
     # TODO(wiki_qa): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "question_id":
             nlp.Value("string"),
             "question":
             nlp.Value("string"),
             "document_title":
             nlp.Value("string"),
             "answer":
             nlp.Value("string"),
             "label":
             nlp.features.ClassLabel(num_classes=2),
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage=
         "https://www.microsoft.com/en-us/download/details.aspx?id=52419",
         citation=_CITATION,
     )
Пример #19
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features(
             {
                 "text": nlp.Value("string"),
                 "topics": nlp.Sequence(nlp.Value("string")),
                 "lewis_split": nlp.Value("string"),
                 "cgis_split": nlp.Value("string"),
                 "old_id": nlp.Value("string"),
                 "new_id": nlp.Value("string"),
                 "places": nlp.Sequence(nlp.Value("string")),
                 "people": nlp.Sequence(nlp.Value("string")),
                 "orgs": nlp.Sequence(nlp.Value("string")),
                 "exchanges": nlp.Sequence(nlp.Value("string")),
                 "date": nlp.Value("string"),
                 "title": nlp.Value("string"),
             }
         ),
         # No default supervised_keys (as we have to pass both premise
         # and hypothesis as input).
         supervised_keys=None,
         homepage="https://kdd.ics.uci.edu/databases/reuters21578/reuters21578.html",
         citation=_CITATION,
     )
Пример #20
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             "q_id":
             nlp.Value("string"),
             "title":
             nlp.Value("string"),
             "selftext":
             nlp.Value("string"),
             "document":
             nlp.Value("string"),
             "subreddit":
             nlp.Value("string"),
             "answers":
             nlp.features.Sequence({
                 "a_id": nlp.Value("string"),
                 "text": nlp.Value("string"),
                 "score": nlp.Value("int32")
             }),
             "title_urls":
             nlp.features.Sequence({"url": nlp.Value("string")}),
             "selftext_urls":
             nlp.features.Sequence({"url": nlp.Value("string")}),
             "answers_urls":
             nlp.features.Sequence({"url": nlp.Value("string")}),
         }),
         supervised_keys=None,
         homepage="https://facebookresearch.github.io/ELI5/explore.html",
         citation=_CITATION,
     )
Пример #21
0
    def _info(self):
        features = {
            feature: nlp.Value("string")
            for feature in self.config.features
        }
        if self.config.name == 'task1':
            features["id"] = nlp.Value("int64")
            features["text"] = nlp.Value("string")
            features["url"] = nlp.Value("string")
            features["label"] = nlp.ClassLabel(names=["0", "1"])
        elif self.config.name == 'task2':
            features["id"] = nlp.Value("int64")
            features["label"] = nlp.ClassLabel(names=["0", "1"])
            features["last"] = nlp.Value("bool")
            features["sent_num"] = nlp.Value("int64")
            features["sentence"] = nlp.Value("string")
        elif self.config.name in ['task3_document', 'task3_sentence']:
            features['token'] = nlp.Sequence(nlp.Value("string"))
            features['label'] = nlp.Sequence(
                nlp.ClassLabel(names=[
                    'B-etime', 'B-fname', 'B-loc', 'B-organizer',
                    'B-participant', 'B-place', 'B-target', 'B-trigger',
                    'I-etime', 'I-fname', 'I-loc', 'I-organizer',
                    'I-participant', 'I-place', 'I-target', 'I-trigger', 'O'
                ]))
        else:
            raise SystemExit('Invalid task name')

        return nlp.DatasetInfo(features=nlp.Features(features), )
Пример #22
0
 def _info(self):
     # TODO(ai2_arc): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "id":
             nlp.Value("string"),
             "question":
             nlp.Value("string"),
             "choices":
             nlp.features.Sequence({
                 "text": nlp.Value("string"),
                 "label": nlp.Value("string")
             }),
             "answerKey":
             nlp.Value("string")
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://allenai.org/data/arc",
         citation=_CITATION,
     )
Пример #23
0
def read_col_formatted_as_numpy(feats, tmp_dir):
    dataset = nlp.Dataset.from_file(filename=os.path.join(
        tmp_dir, "beta.arrow"),
                                    info=nlp.DatasetInfo(features=feats))
    dataset.set_format("numpy")
    for col in feats:
        _ = dataset[col]
Пример #24
0
    def _info(self):
        # TODO(xtreme): Specifies the nlp.DatasetInfo object
        features = {
            text_feature: nlp.Value("string")
            for text_feature in six.iterkeys(self.config.text_features)
        }
        if "answers" in features.keys():
            features["answers"] = nlp.features.Sequence({
                "answer_start":
                nlp.Value("int32"),
                "text":
                nlp.Value("string")
            })
        if self.config.name.startswith("PAWS-X"):
            features["label"] = nlp.Value("string")
        if self.config.name == "XNLI":
            features["gold_label"] = nlp.Value("string")

        return nlp.DatasetInfo(
            # This is the description that will appear on the datasets page.
            description=self.config.description + "\n" + _DESCRIPTION,
            # nlp.features.FeatureConnectors
            features=nlp.Features(
                features
                # These are the features of your dataset like images, labels ...
            ),
            # If there's a common (input, target) tuple from the features,
            # specify them here. They'll be used if as_supervised=True in
            # builder.as_dataset.
            supervised_keys=None,
            # Homepage of the dataset for documentation
            homepage="https://github.com/google-research/xtreme" + "\t" +
            self.config.url,
            citation=self.config.citation + "\n" + _CITATION,
        )
    def _info(self):
        # TODO(lc_quad): Specifies the nlp.DatasetInfo object
        return nlp.DatasetInfo(
            # This is the description that will appear on the datasets page.
            description=_DESCRIPTION,
            # nlp.features.FeatureConnectors
            features=nlp.Features({
                "NNQT_question": nlp.Value('string'),
                "uid": nlp.Value('int32'),
                "subgraph": nlp.Value('string'),
                "template_index": nlp.Value('int32'),
                "question": nlp.Value('string'),
                "sparql_wikidata": nlp.Value('string'),
                "sparql_dbpedia18": nlp.Value('string'),
                "template": nlp.Value('string'),

                #"template_id": nlp.Value('string'),
                "paraphrased_question": nlp.Value('string')

                # These are the features of your dataset like images, labels ...
            }),
            # If there's a common (input, target) tuple from the features,
            # specify them here. They'll be used if as_supervised=True in
            # builder.as_dataset.
            supervised_keys=None,
            # Homepage of the dataset for documentation
            homepage='http://lc-quad.sda.tech/',
            citation=_CITATION,
        )
Пример #26
0
 def _info(self):
     # TODO(empathetic_dialogues): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "conv_id": nlp.Value("string"),
             "utterance_idx": nlp.Value("int32"),
             "context": nlp.Value("string"),
             "prompt": nlp.Value("string"),
             "speaker_idx": nlp.Value("int32"),
             "utterance": nlp.Value("string"),
             "selfeval": nlp.Value("string"),
             "tags": nlp.Value("string")
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://github.com/facebookresearch/EmpatheticDialogues",
         citation=_CITATION,
     )
Пример #27
0
 def _info(self):
     # TODO(squad_it): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "id":
             nlp.Value('string'),
             "context":
             nlp.Value('string'),
             "question":
             nlp.Value('string'),
             "answers":
             nlp.features.Sequence({
                 "text": nlp.Value('string'),
                 "answer_start": nlp.Value('int32'),
             }),
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage='https://github.com/crux82/squad-it',
         citation=_CITATION,
     )
Пример #28
0
 def _info(self):
     # TODO(quoref): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "id":
             nlp.Value("string"),
             "question":
             nlp.Value("string"),
             "context":
             nlp.Value("string"),
             "title":
             nlp.Value("string"),
             "url":
             nlp.Value("string"),
             "answers":
             nlp.features.Sequence({
                 "answer_start": nlp.Value("int32"),
                 "text": nlp.Value("string"),
             })
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage=
         "https://leaderboard.allenai.org/quoref/submissions/get-started",
         citation=_CITATION,
     )
Пример #29
0
 def _info(self):
     # TODO(art): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "observation_1":
             nlp.Value("string"),
             "observation_2":
             nlp.Value("string"),
             "hypothesis_1":
             nlp.Value("string"),
             "hypothesis_2":
             nlp.Value("string"),
             "label":
             nlp.features.ClassLabel(num_classes=3)
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage=
         "https://leaderboard.allenai.org/anli/submissions/get-started",
         citation=_CITATION,
     )
Пример #30
0
 def _info(self):
     # TODO(coqa): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features(
             {
                 "source": nlp.Value("string"),
                 "story": nlp.Value("string"),
                 "questions": nlp.features.Sequence({"input_text": nlp.Value("string"),}),
                 "answers": nlp.features.Sequence(
                     {
                         "input_text": nlp.Value("string"),
                         "answer_start": nlp.Value("int32"),
                         "answer_end": nlp.Value("int32"),
                     }
                 ),
                 # ##the foloowing feature allows to take into account additional answers in the validation set
                 # 'additional_answers': nlp.features.Sequence({
                 #         "input_texts": nlp.Value('int32'),
                 #         "answers_start": nlp.Value('int32'),
                 #         "answers_end": nlp.Value('int32')
                 #     }),
                 # These are the features of your dataset like images, labels ...
             }
         ),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://stanfordnlp.github.io/coqa/",
         citation=_CITATION,
     )