def test_load_20ng(): data_home = get_data_home(data_home=None) cache_path = _pkl_filepath(data_home, "20NewsGroup" + ".pkz") if os.path.exists(cache_path): os.remove(cache_path) dataset = Dataset() dataset.fetch_dataset("20NewsGroup") assert len(dataset.get_corpus()) == 16309 assert len(dataset.get_labels()) == 16309 assert os.path.exists(cache_path) dataset = Dataset() dataset.fetch_dataset("20NewsGroup") assert len(dataset.get_corpus()) == 16309
def test_load_M10(): dataset = Dataset() dataset.fetch_dataset("M10") assert len(set(dataset.get_labels())) == 10