def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage="https://www.bl.uk/collection-guides/digitised-printed-books", citation=_CITATION, task_templates=[LanguageModeling(text_column="text")], )
def _info(self): features = datasets.Features( { "id": datasets.Value("string"), "source": datasets.Value("string"), "url": datasets.Value("string"), "title": datasets.Value("string"), "ispartof": datasets.Value("string"), "text": datasets.Value("string"), "pub_date": datasets.Value("timestamp[s]"), "publisher": datasets.Value("string"), "language": datasets.Value("string"), "article_type": datasets.ClassLabel( names=[ "ADVERTISEMENT_SECTION", "BIBLIOGRAPHY", "CHAPTER", "INDEX", "CONTRIBUTION", "TABLE_OF_CONTENTS", "WEATHER", "SHIPPING", "SECTION", "ARTICLE", "TITLE_SECTION", "DEATH_NOTICE", "SUPPLEMENT", "TABLE", "ADVERTISEMENT", "CHART_DIAGRAM", "ILLUSTRATION", "ISSUE", ] ), "extent": datasets.Value("int32"), } ) return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # This defines the different columns of the dataset and their types features=features, # Here we define them above because they are different between the two configurations # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage=_HOMEPAGE, # License for the dataset if available license=_LICENSE, # Citation for the dataset citation=_CITATION, task_templates=[LanguageModeling(text_column="text")], )
def _info(self): features = datasets.Features({ "id": datasets.Value("string"), "source": datasets.Value("string"), "url": datasets.Value("string"), "title": datasets.Value("string"), "ispartof": datasets.Value("string"), "text": datasets.Value("string"), "pub_date": datasets.Value("timestamp[s]"), "publisher": datasets.Value("string"), "language": datasets.Value("string"), "article_type": datasets.ClassLabel(names=[ "ADVERTISEMENT_SECTION", "BIBLIOGRAPHY", "CHAPTER", "INDEX", "CONTRIBUTION", "TABLE_OF_CONTENTS", "WEATHER", "SHIPPING", "SECTION", "ARTICLE", "TITLE_SECTION", "DEATH_NOTICE", "SUPPLEMENT", "TABLE", "ADVERTISEMENT", "CHART_DIAGRAM", "ILLUSTRATION", "ISSUE", ]), "extent": datasets.Value("int32"), }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, task_templates=[LanguageModeling(text_column="text")], )
def test_from_dict(self): input_schema = Features({"text": Value("string")}) template_dict = {"text_column": "input_text"} task = LanguageModeling.from_dict(template_dict) assert "language-modeling" == task.task assert input_schema == task.input_schema
def test_column_mapping(self): task = LanguageModeling(text_column="input_text") assert {"input_text": "text"} == task.column_mapping