def _info(self): # TODO(blended_skill_talk): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "personas": nlp.features.Sequence(nlp.Value("string")), "additional_context": nlp.Value("string"), "previous_utterance": nlp.features.Sequence(nlp.Value("string")), "context": nlp.Value("string"), "free_messages": nlp.features.Sequence(nlp.Value("string")), "guided_messages": nlp.features.Sequence(nlp.Value("string")), "suggestions": nlp.features.Sequence( {task: nlp.Value("string") for task in _TASK}) # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://parl.ai/projects/bst/", citation=_CITATION, )
def _info(self): # TODO(race): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "article": nlp.Value("string"), "answer": nlp.Value("string"), "question": nlp.Value("string"), "options": nlp.features.Sequence({"option": nlp.Value("string")}) # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="http://www.cs.cmu.edu/~glai1/data/race/", citation=_CITATION, )
def _info(self): # TODO(wiqa): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features( { # These are the features of your dataset like images, labels ... "question_stem": nlp.Value("string"), "question_para_step": nlp.features.Sequence(nlp.Value("string")), "answer_label": nlp.Value("string"), "answer_label_as_choice": nlp.Value("string"), "choices": nlp.features.Sequence({"text": nlp.Value("string"), "label": nlp.Value("string")}), "metadata_question_id": nlp.Value("string"), "metadata_graph_id": nlp.Value("string"), "metadata_para_id": nlp.Value("string"), "metadata_question_type": nlp.Value("string"), "metadata_path_len": nlp.Value("int32"), } ), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://allenai.org/data/wiqa", citation=_CITATION, )
def read_batch_unformated(feats, tmp_dir): batch_size = 10 dataset = nlp.Dataset.from_file(filename=os.path.join( tmp_dir, "beta.arrow"), info=nlp.DatasetInfo(features=feats)) for i in range(0, len(dataset), batch_size): _ = dataset[i:i + batch_size]
def _info(self): # TODO(coqa): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "source": nlp.Value("string"), "story": nlp.Value("string"), "questions": nlp.features.Sequence(nlp.Value("string")), "answers": nlp.features.Sequence({ "input_text": nlp.Value("string"), "answer_start": nlp.Value("int32"), "answer_end": nlp.Value("int32"), }), }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://stanfordnlp.github.io/coqa/", citation=_CITATION, )
def _info(self): # TODO(x_stance): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "question": nlp.Value("string"), "id": nlp.Value("int32"), "question_id": nlp.Value("int32"), "language": nlp.Value("string"), "comment": nlp.Value("string"), "label": nlp.Value("string"), "numerical_label": nlp.Value("int32"), "author": nlp.Value("string"), "topic": nlp.Value("string") # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://github.com/ZurichNLP/xstance", citation=_CITATION, )
def _info(self): # TODO(math_qa): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features( { # These are the features of your dataset like images, labels ... "Problem": nlp.Value("string"), "Rationale": nlp.Value("string"), "options": nlp.Value("string"), "correct": nlp.Value("string"), "annotated_formula": nlp.Value("string"), "linear_formula": nlp.Value("string"), "category": nlp.Value("string"), } ), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://math-qa.github.io/math-QA/", citation=_CITATION, )
def _info(self): # TODO(cosmos_qa): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "id": nlp.Value("string"), "context": nlp.Value("string"), "question": nlp.Value("string"), "answer0": nlp.Value("string"), "answer1": nlp.Value("string"), "answer2": nlp.Value("string"), "answer3": nlp.Value("string"), "label": nlp.Value("int32") # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://wilburone.github.io/cosmos/", citation=_CITATION, )
def _info(self): # TODO(openBookQA): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ # These are the features of your dataset like images, labels ... 'id': nlp.Value('string'), 'question_stem': nlp.Value('string'), 'choices': nlp.features.Sequence({ 'text': nlp.Value('string'), 'label': nlp.Value('string') }), 'answerKey': nlp.Value('string') }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage='https://allenai.org/data/open-book-qa', citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION + '\n' + self.config.description, features=nlp.Features( { "category": nlp.Value("string"), "air_date": nlp.Value("string"), "question": nlp.Value("string"), "value": nlp.Value("string"), "answer": nlp.Value("string"), "round": nlp.Value("string"), "category": nlp.Value("string"), "show_number": nlp.Value("int32"), "search_results": nlp.features.Sequence({ 'urls': nlp.Value('string'), "snippets": nlp.Value('string'), "titles": nlp.Value('string'), "related_links": nlp.Value('string') }) # These are the features of your dataset like images, labels ... } ), homepage="https://github.com/nyu-dl/dl4ir-searchQA", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ "id": nlp.Value("string"), "title": nlp.Value("string"), "context": nlp.Value("string"), "question": nlp.Value("string"), "answers": nlp.features.Sequence({ "text": nlp.Value("string"), "answer_start": nlp.Value("int32") }), }), # No default supervised_keys (as we have to pass both question # and context as input). supervised_keys=None, homepage= "https://github.com/husseinmozannar/SOQAL/tree/master/data", citation=_CITATION, )
def _info(self): # TODO(reclor): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ # These are the features of your dataset like images, labels ... "context": nlp.Value("string"), "question": nlp.Value("string"), "answers": nlp.features.Sequence(nlp.Value("string")), "label": nlp.Value("string"), "id_string": nlp.Value("string"), }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="http://whyu.me/reclor/", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( features=nlp.Features( {"a": nlp.Sequence({"b": nlp.Value("string")})}), # No default supervised_keys. supervised_keys=None, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION + self.config.description, features=nlp.Features(self.config.features), homepage=_HOMEPAGE, citation=_CITATION, )
def _info(self): # TODO(fquad): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ 'context': nlp.Value('string'), 'questions': nlp.features.Sequence({ 'question': nlp.Value('string'), }), 'answers': nlp.features.Sequence({ 'texts': nlp.Value('string'), 'answers_starts': nlp.Value('int32') }), # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage='https://fquad.illuin.tech/', citation=_CITATION, )
def _info(self): if self.config.name == 'wiki_pages': features = { 'id': nlp.Value('string'), 'text': nlp.Value('string'), 'lines': nlp.Value('string'), } else: features = { 'id': nlp.Value('int32'), 'label': nlp.Value('string'), 'claim': nlp.Value('string'), 'evidence_annotation_id': nlp.Value('int32'), 'evidence_id': nlp.Value('int32'), 'evidence_wiki_url': nlp.Value('string'), 'evidence_sentence_id': nlp.Value('int32') } return nlp.DatasetInfo( description=_DESCRIPTION + '\n' + self.config.description, features=nlp.Features(features), homepage="https://fever.ai/", citation=_CITATION, )
def _info(self): # TODO(quarel): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ # These are the features of your dataset like images, labels ... 'id': nlp.Value('string'), 'answer_index': nlp.Value('int32'), 'logical_forms': nlp.features.Sequence({'logical_form': nlp.Value('string')}), 'logical_form_pretty': nlp.Value('string'), 'world_literals': nlp.features.Sequence({ 'world1': nlp.Value('string'), 'world2': nlp.Value('string') }), 'question': nlp.Value('string') }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage='https://allenai.org/data/quarel', citation=_CITATION, )
def _info(self): # TODO(wiki_qa): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "question_id": nlp.Value("string"), "question": nlp.Value("string"), "document_title": nlp.Value("string"), "answer": nlp.Value("string"), "label": nlp.features.ClassLabel(num_classes=2), # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage= "https://www.microsoft.com/en-us/download/details.aspx?id=52419", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features( { "text": nlp.Value("string"), "topics": nlp.Sequence(nlp.Value("string")), "lewis_split": nlp.Value("string"), "cgis_split": nlp.Value("string"), "old_id": nlp.Value("string"), "new_id": nlp.Value("string"), "places": nlp.Sequence(nlp.Value("string")), "people": nlp.Sequence(nlp.Value("string")), "orgs": nlp.Sequence(nlp.Value("string")), "exchanges": nlp.Sequence(nlp.Value("string")), "date": nlp.Value("string"), "title": nlp.Value("string"), } ), # No default supervised_keys (as we have to pass both premise # and hypothesis as input). supervised_keys=None, homepage="https://kdd.ics.uci.edu/databases/reuters21578/reuters21578.html", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ "q_id": nlp.Value("string"), "title": nlp.Value("string"), "selftext": nlp.Value("string"), "document": nlp.Value("string"), "subreddit": nlp.Value("string"), "answers": nlp.features.Sequence({ "a_id": nlp.Value("string"), "text": nlp.Value("string"), "score": nlp.Value("int32") }), "title_urls": nlp.features.Sequence({"url": nlp.Value("string")}), "selftext_urls": nlp.features.Sequence({"url": nlp.Value("string")}), "answers_urls": nlp.features.Sequence({"url": nlp.Value("string")}), }), supervised_keys=None, homepage="https://facebookresearch.github.io/ELI5/explore.html", citation=_CITATION, )
def _info(self): features = { feature: nlp.Value("string") for feature in self.config.features } if self.config.name == 'task1': features["id"] = nlp.Value("int64") features["text"] = nlp.Value("string") features["url"] = nlp.Value("string") features["label"] = nlp.ClassLabel(names=["0", "1"]) elif self.config.name == 'task2': features["id"] = nlp.Value("int64") features["label"] = nlp.ClassLabel(names=["0", "1"]) features["last"] = nlp.Value("bool") features["sent_num"] = nlp.Value("int64") features["sentence"] = nlp.Value("string") elif self.config.name in ['task3_document', 'task3_sentence']: features['token'] = nlp.Sequence(nlp.Value("string")) features['label'] = nlp.Sequence( nlp.ClassLabel(names=[ 'B-etime', 'B-fname', 'B-loc', 'B-organizer', 'B-participant', 'B-place', 'B-target', 'B-trigger', 'I-etime', 'I-fname', 'I-loc', 'I-organizer', 'I-participant', 'I-place', 'I-target', 'I-trigger', 'O' ])) else: raise SystemExit('Invalid task name') return nlp.DatasetInfo(features=nlp.Features(features), )
def _info(self): # TODO(ai2_arc): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "id": nlp.Value("string"), "question": nlp.Value("string"), "choices": nlp.features.Sequence({ "text": nlp.Value("string"), "label": nlp.Value("string") }), "answerKey": nlp.Value("string") # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://allenai.org/data/arc", citation=_CITATION, )
def read_col_formatted_as_numpy(feats, tmp_dir): dataset = nlp.Dataset.from_file(filename=os.path.join( tmp_dir, "beta.arrow"), info=nlp.DatasetInfo(features=feats)) dataset.set_format("numpy") for col in feats: _ = dataset[col]
def _info(self): # TODO(xtreme): Specifies the nlp.DatasetInfo object features = { text_feature: nlp.Value("string") for text_feature in six.iterkeys(self.config.text_features) } if "answers" in features.keys(): features["answers"] = nlp.features.Sequence({ "answer_start": nlp.Value("int32"), "text": nlp.Value("string") }) if self.config.name.startswith("PAWS-X"): features["label"] = nlp.Value("string") if self.config.name == "XNLI": features["gold_label"] = nlp.Value("string") return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=self.config.description + "\n" + _DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features( features # These are the features of your dataset like images, labels ... ), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://github.com/google-research/xtreme" + "\t" + self.config.url, citation=self.config.citation + "\n" + _CITATION, )
def _info(self): # TODO(lc_quad): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "NNQT_question": nlp.Value('string'), "uid": nlp.Value('int32'), "subgraph": nlp.Value('string'), "template_index": nlp.Value('int32'), "question": nlp.Value('string'), "sparql_wikidata": nlp.Value('string'), "sparql_dbpedia18": nlp.Value('string'), "template": nlp.Value('string'), #"template_id": nlp.Value('string'), "paraphrased_question": nlp.Value('string') # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage='http://lc-quad.sda.tech/', citation=_CITATION, )
def _info(self): # TODO(empathetic_dialogues): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "conv_id": nlp.Value("string"), "utterance_idx": nlp.Value("int32"), "context": nlp.Value("string"), "prompt": nlp.Value("string"), "speaker_idx": nlp.Value("int32"), "utterance": nlp.Value("string"), "selfeval": nlp.Value("string"), "tags": nlp.Value("string") # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://github.com/facebookresearch/EmpatheticDialogues", citation=_CITATION, )
def _info(self): # TODO(squad_it): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "id": nlp.Value('string'), "context": nlp.Value('string'), "question": nlp.Value('string'), "answers": nlp.features.Sequence({ "text": nlp.Value('string'), "answer_start": nlp.Value('int32'), }), # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage='https://github.com/crux82/squad-it', citation=_CITATION, )
def _info(self): # TODO(quoref): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "id": nlp.Value("string"), "question": nlp.Value("string"), "context": nlp.Value("string"), "title": nlp.Value("string"), "url": nlp.Value("string"), "answers": nlp.features.Sequence({ "answer_start": nlp.Value("int32"), "text": nlp.Value("string"), }) # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage= "https://leaderboard.allenai.org/quoref/submissions/get-started", citation=_CITATION, )
def _info(self): # TODO(art): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "observation_1": nlp.Value("string"), "observation_2": nlp.Value("string"), "hypothesis_1": nlp.Value("string"), "hypothesis_2": nlp.Value("string"), "label": nlp.features.ClassLabel(num_classes=3) # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage= "https://leaderboard.allenai.org/anli/submissions/get-started", citation=_CITATION, )
def _info(self): # TODO(coqa): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features( { "source": nlp.Value("string"), "story": nlp.Value("string"), "questions": nlp.features.Sequence({"input_text": nlp.Value("string"),}), "answers": nlp.features.Sequence( { "input_text": nlp.Value("string"), "answer_start": nlp.Value("int32"), "answer_end": nlp.Value("int32"), } ), # ##the foloowing feature allows to take into account additional answers in the validation set # 'additional_answers': nlp.features.Sequence({ # "input_texts": nlp.Value('int32'), # "answers_start": nlp.Value('int32'), # "answers_end": nlp.Value('int32') # }), # These are the features of your dataset like images, labels ... } ), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://stanfordnlp.github.io/coqa/", citation=_CITATION, )