Пример #1
0
    def _info(self):
        # ToDo: Consider adding an alternate configuration for the entity slots. The default is to only return the intent labels.

        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=datasets.Features({
                "text":
                datasets.Value("string"),
                "label":
                datasets.features.ClassLabel(names=[
                    "ComparePlaces",
                    "RequestRide",
                    "GetWeather",
                    "SearchPlace",
                    "GetPlaceDetails",
                    "ShareCurrentLocation",
                    "GetTrafficInformation",
                    "BookRestaurant",
                    "GetDirections",
                    "ShareETA",
                ]),
            }),
            homepage=
            "https://github.com/sonos/nlu-benchmark/tree/master/2016-12-built-in-intents",
            citation=_CITATION,
            task_templates=[
                TextClassification(text_column="text", label_column="label")
            ],
        )
 def test_column_mapping(self):
     task = TextClassification(text_column="input_text",
                               label_column="input_label")
     self.assertDictEqual({
         "input_text": "text",
         "input_label": "labels"
     }, task.column_mapping)
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "text":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(names=[
                 "saree",
                 "kamel",
                 "mutakareb",
                 "mutadarak",
                 "munsareh",
                 "madeed",
                 "mujtath",
                 "ramal",
                 "baseet",
                 "khafeef",
                 "taweel",
                 "wafer",
                 "hazaj",
                 "rajaz",
             ]),
         }),
         supervised_keys=None,
         homepage="https://github.com/zaidalyafeai/MetRec",
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="text", label_column="label")
         ],
     )
Пример #4
0
    def _info(self):

        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=datasets.Features({
                "label":
                datasets.features.ClassLabel(names=[
                    "fragment",
                    "statement",
                    "question",
                    "command",
                    "rhetorical question",
                    "rhetorical command",
                    "intonation-dependent utterance",
                ]),
                "text":
                datasets.Value("string"),
            }),
            supervised_keys=None,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
            task_templates=[
                TextClassification(text_column="text", label_column="label")
            ],
        )
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "sentence":
             datasets.Value("string"),
             "target":
             datasets.ClassLabel(names=[
                 "geogName",
                 "noEntity",
                 "orgName",
                 "persName",
                 "placeName",
                 "time",
             ]),
         }),
         supervised_keys=None,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="sentence",
                                label_column="target")
         ],
     )
Пример #6
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "news":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(names=[
                 "урлаг соёл",
                 "эдийн засаг",
                 "эрүүл мэнд",
                 "хууль",
                 "улс төр",
                 "спорт",
                 "технологи",
                 "боловсрол",
                 "байгал орчин",
             ]),
         }),
         homepage="http://eduge.mn",
         task_templates=[
             TextClassification(
                 text_column="news",
                 label_column="label",
             )
         ],
     )
Пример #7
0
 def test_value_error_unique_labels(self):
     with self.assertRaises(ValueError):
         # Add duplicate labels
         labels = self.labels + self.labels[:1]
         task = TextClassification(text_column="input_text",
                                   label_column="input_label",
                                   labels=labels)
         self.assertEqual("text-classification", task.task)
 def test_align_with_features(self):
     task = TextClassification(text_column="input_text",
                               label_column="input_label")
     self.assertEqual(task.label_schema["labels"], ClassLabel)
     task = task.align_with_features(
         Features({"input_label": ClassLabel(names=self.labels)}))
     self.assertEqual(task.label_schema["labels"],
                      ClassLabel(names=self.labels))
Пример #9
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {"text": datasets.Value("string"), "label": datasets.features.ClassLabel(names=["neg", "pos"])}
         ),
         supervised_keys=None,
         homepage="http://ai.stanford.edu/~amaas/data/sentiment/",
         citation=_CITATION,
         task_templates=[TextClassification(text_column="text", label_column="label")],
     )
Пример #10
0
    def test_remove_and_map_on_task_template(self):
        features = Features({"text": Value("string"), "label": ClassLabel(names=("pos", "neg"))})
        task_templates = TextClassification(text_column="text", label_column="label")
        info = DatasetInfo(features=features, task_templates=task_templates)
        dataset = Dataset.from_dict({"text": ["A sentence."], "label": ["pos"]}, info=info)

        def process(example):
            return example

        modified_dataset = dataset.remove_columns("label")
        mapped_dataset = modified_dataset.map(process)
        assert mapped_dataset.info.task_templates == []
 def _info(self):
     class_names = ["sadness", "joy", "love", "anger", "fear", "surprise"]
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {"text": datasets.Value("string"), "label": datasets.ClassLabel(names=class_names)}
         ),
         supervised_keys=("text", "label"),
         homepage=_URL,
         citation=_CITATION,
         task_templates=[TextClassification(text_column="text", label_column="label")],
     )
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {
                 "sentence": datasets.Value("string"),
                 "sentiment": datasets.ClassLabel(names=["negative", "positive"]),
             }
         ),
         citation=_CITATION,
         homepage=_HOMEPAGE,
         task_templates=[TextClassification(text_column="sentence", label_column="sentiment")],
     )
Пример #13
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {
                 "texts": datasets.Value("string"),
                 "category": datasets.features.ClassLabel(names=["pos", "neu", "neg", "q"]),
             }
         ),
         supervised_keys=None,
         homepage="https://github.com/PyThaiNLP/wisesight-sentiment",
         citation=_CITATION,
         task_templates=[TextClassification(text_column="texts", label_column="category")],
     )
 def _info(self):
     features = datasets.Features({
         "review_body":
         datasets.Value("string"),
         "review_summary":
         datasets.Value("string"),
         "star_rating":
         datasets.ClassLabel(names=[str(i) for i in range(1, 6)]),
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage="http://www.lsi.us.es/~fermin/index.php/Datasets",
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="review_body",
                                label_column="star_rating"),
             TextClassification(text_column="review_summary",
                                label_column="star_rating"),
         ],
     )
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {
                 "text": datasets.Value("string"),
                 "label": datasets.features.ClassLabel(names=["1", "2"]),
             }
         ),
         supervised_keys=None,
         homepage="https://course.fast.ai/datasets",
         citation=_CITATION,
         task_templates=[TextClassification(text_column="text", label_column="label")],
     )
Пример #16
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {
                 "language_id": datasets.ClassLabel(names=["AWA", "BRA", "MAG", "BHO", "HIN"]),
                 "text": datasets.Value("string"),
             }
         ),
         supervised_keys=None,
         homepage="https://github.com/kmi-linguistics/vardial2018",
         citation=_CITATION,
         task_templates=[TextClassification(text_column="text", label_column="language_id")],
     )
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {
                 "text": datasets.Value("string"),
                 "label": datasets.features.ClassLabel(
                     names=["Positive", "Negative", "Mixed_feelings", "unknown_state", "not-Tamil"]
                 ),
             }
         ),
         homepage="https://dravidian-codemix.github.io/2020/datasets.html",
         citation=_CITATION,
         task_templates=[TextClassification(text_column="text", label_column="label")],
     )
Пример #18
0
    def _info(self):

        return datasets.DatasetInfo(
            # This is the description that will appear on the datasets page.
            description=_DESCRIPTION,
            # This defines the different columns of the dataset and their types
            features=datasets.Features(
                {"sentence": datasets.Value("string"), "label": datasets.features.ClassLabel(names=_CLASSES)}
            ),
            supervised_keys=None,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
            task_templates=[TextClassification(text_column="sentence", label_column="label")],
        )
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "text":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(names=["neg", "pos"])
         }),
         supervised_keys=[""],
         homepage="http://www.cs.cornell.edu/people/pabo/movie-review-data/",
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="text", label_column="label")
         ],
     )
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "label":
             datasets.ClassLabel(names=["no-hate-speech", "hate-speech"]),
             "tweet":
             datasets.Value("string"),
         }),
         homepage=
         "https://github.com/sharmaroshan/Twitter-Sentiment-Analysis",
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="tweet", label_column="label")
         ],
     )
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "text":
             datasets.Value("string"),
             "label":
             datasets.ClassLabel(names=["negative", "positive"])
         }),
         supervised_keys=None,
         homepage="https://github.com/timpal0l/swedish-sentiment",
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="text", label_column="label")
         ],
     )
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "review":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(names=["neg", "pos"]),
         }),
         supervised_keys=None,
         homepage=
         "https://github.com/TheophileBlard/french-sentiment-analysis-with-bert",
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="review", label_column="label")
         ],
     )
    def _info(self):

        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=datasets.Features({
                "tokens":
                datasets.Value("string"),
                "label":
                datasets.features.ClassLabel(names=["no_sarcasm", "sarcasm"]),
            }),
            supervised_keys=None,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            task_templates=[
                TextClassification(text_column="tokens", label_column="label")
            ],
        )
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "sms":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(names=["ham", "spam"]),
         }),
         supervised_keys=("sms", "label"),
         homepage=
         "http://archive.ics.uci.edu/ml/datasets/SMS+Spam+Collection",
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="sms", label_column="label")
         ],
     )
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "text":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(
                 names=["World", "Sports", "Business", "Sci/Tech"]),
         }),
         homepage=
         "http://groups.di.unipi.it/~gulli/AG_corpus_of_news_articles.html",
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="text", label_column="label")
         ],
     )
Пример #26
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "text":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(
                 names=["others", "happy", "sad", "angry"]),
         }),
         supervised_keys=None,
         homepage="https://www.aclweb.org/anthology/S19-2005/",
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="text", label_column="label")
         ],
     )
Пример #27
0
 def _info(self):
     return datasets.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # This defines the different columns of the dataset and their types
         features=datasets.Features({
             "sentence":
             datasets.Value("string"),
             "relation":
             datasets.ClassLabel(names=[
                 "Cause-Effect(e1,e2)",
                 "Cause-Effect(e2,e1)",
                 "Component-Whole(e1,e2)",
                 "Component-Whole(e2,e1)",
                 "Content-Container(e1,e2)",
                 "Content-Container(e2,e1)",
                 "Entity-Destination(e1,e2)",
                 "Entity-Destination(e2,e1)",
                 "Entity-Origin(e1,e2)",
                 "Entity-Origin(e2,e1)",
                 "Instrument-Agency(e1,e2)",
                 "Instrument-Agency(e2,e1)",
                 "Member-Collection(e1,e2)",
                 "Member-Collection(e2,e1)",
                 "Message-Topic(e1,e2)",
                 "Message-Topic(e2,e1)",
                 "Product-Producer(e1,e2)",
                 "Product-Producer(e2,e1)",
                 "Other",
             ]),
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=datasets.info.SupervisedKeysData(
             input="sentence", output="relation"),
         # Homepage of the dataset for documentation
         homepage=
         "https://semeval2.fbk.eu/semeval2.php?location=tasks&taskid=11",
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="sentence",
                                label_column="relation")
         ],
     )
Пример #28
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "tweet":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(names=[
                 "none", "anger", "joy", "sadness", "love", "sympathy",
                 "surprise", "fear"
             ]),
         }),
         homepage=_HOMEPAGE,
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="tweet", label_column="label")
         ],
     )
 def _info(self):
     # TODO: This method specifies the datasets.DatasetInfo object which contains informations and typings for the dataset
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "text":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(names=["ele", "int", "adv"])
         }),
         supervised_keys=[""],
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="text", label_column="label")
         ],
     )
Пример #30
0
 def _info(self):
     # Labels: 0="Non-hate Speech", 1="Hate Speech"
     features = datasets.Features({
         "text":
         datasets.Value("string"),
         "label":
         datasets.features.ClassLabel(names=["0", "1"])
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="text", label_column="label")
         ],
     )