Пример #1
0
 def load_builder_class(self, dataset_name, is_local=False):
     # Download/copy dataset script
     if is_local is True:
         module_path, _ = prepare_module("./datasets/" + dataset_name)
     else:
         module_path, _ = prepare_module(dataset_name, download_config=DownloadConfig(force_download=True))
     # Get dataset builder class
     builder_cls = import_main_class(module_path)
     return builder_cls
Пример #2
0
 def test_load_real_dataset(self, dataset_name):
     path = "./datasets/" + dataset_name
     module_path, hash = prepare_module(path, download_config=DownloadConfig(local_files_only=True), dataset=True)
     builder_cls = import_main_class(module_path, dataset=True)
     name = builder_cls.BUILDER_CONFIGS[0].name if builder_cls.BUILDER_CONFIGS else None
     with tempfile.TemporaryDirectory() as temp_cache_dir:
         dataset = load_dataset(
             path, name=name, cache_dir=temp_cache_dir, download_mode=GenerateMode.FORCE_REDOWNLOAD
         )
         for split in dataset.keys():
             self.assertTrue(len(dataset[split]) > 0)
         del dataset
Пример #3
0
 def test_load_real_dataset_all_configs(self, dataset_name):
     path = dataset_name
     module_path, hash = prepare_module(path, download_config=DownloadConfig(force_download=True), dataset=True)
     builder_cls = import_main_class(module_path, dataset=True)
     config_names = (
         [config.name for config in builder_cls.BUILDER_CONFIGS] if len(builder_cls.BUILDER_CONFIGS) > 0 else [None]
     )
     for name in config_names:
         with tempfile.TemporaryDirectory() as temp_cache_dir:
             dataset = load_dataset(
                 path, name=name, cache_dir=temp_cache_dir, download_mode=GenerateMode.FORCE_REDOWNLOAD
             )
             for split in dataset.keys():
                 self.assertTrue(len(dataset[split]) > 0)
             del dataset
Пример #4
0
def get_builder(path, name, data_dir=None, cache_dir=None):
    module_path, hash, resolved_file_path = prepare_module(
        path,
        dataset=True,
        return_resolved_file_path=True,
    )

    builder_cls = import_main_class(module_path, dataset=True)
    builder_instance = builder_cls(
        cache_dir=cache_dir,
        name=name,
        data_dir=data_dir,
        hash=hash,
    )
    return builder_instance