def test_csv_pandas_header(self, tmp_path, noaa_jfk_schema): "Test CSVPandasLoader header options" noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format'][ 'options']['no_header'] = True noaa_dataset = Dataset(noaa_jfk_schema, tmp_path, mode=Dataset.InitializationMode.DOWNLOAD_ONLY) with pytest.raises( ValueError ) as exinfo: # Pandas should error from trying to read string as another dtype noaa_dataset.load() assert ('could not convert string to float' in str(exinfo.value)) noaa_dataset.delete() false_test_cases = [False, '', None] # These should all be treated as False for case in false_test_cases: noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format'][ 'options']['no_header'] = case self.test_csv_pandas_loader(tmp_path, noaa_jfk_schema) del noaa_jfk_schema['subdatasets']['jfk_weather_cleaned']['format'][ 'options']['no_header'] self.test_csv_pandas_loader(tmp_path, noaa_jfk_schema)
def test_loading_undownloaded(self, tmp_path, gmb_schema): "Test loading before ``Dataset.download()`` has been called." dataset = Dataset(gmb_schema, data_dir=tmp_path, mode=Dataset.InitializationMode.LAZY) with pytest.raises(FileNotFoundError) as e: dataset.load(check=False) assert ( 'Failed to load subdataset "gmb_subset_full" because some files are not found. ' 'Did you forget to call Dataset.download()?\nCaused by:\n') in str( e.value) # Half-loaded data objects should get reset to None assert dataset._data is None with pytest.raises(RuntimeError) as e: dataset.data assert str(e.value) == ( 'Data has not been downloaded and/or loaded yet. Call Dataset.download() to download ' 'data, call Dataset.load() to load data.') # Force check undownloaded dataset should error with pytest.raises(RuntimeError) as e: dataset.load(check=True) assert str(e.value) == ( f'Downloaded data files are not present in {dataset._data_dir_} or are corrupted.' )