def test_set_options(): _config.set({'database_directory': '/tmp/collections'}) s1 = _config.get('database_directory') assert s1 == os.path.abspath(os.path.expanduser('/tmp/collections')) _config.set({'database_directory': '/tests/test_collections'}) s2 = _config.get('database_directory') assert not s1 == s2
def test_to_xarray(): with config.set({'database-directory': './tests/test_collections'}): col = intake.open_esm_metadatastore(collection_name='gmet_test') cat = col.search(direct_access=True) ds = cat.to_xarray(chunks={'time': 1}, decode_times=True) assert isinstance(ds, xr.Dataset) assert 'member_id' in ds.coords
def test_build_collection_dict(): with config.set({'database-directory': './tests/test_collections'}): collection_definition = { 'collection_type': 'cmip5', 'data_sources': { 'BNU-ESM': { 'extra_attributes': { 'activity': 'CMIP5', 'institute': 'BNU', 'product': 'output1', }, 'locations': [{ 'direct_access': True, 'exclude_dirs': ['*/files/*', 'latest'], 'file_extension': '.nc', 'loc_type': 'posix', 'name': 'SAMPLE-DATA', 'urlpath': './tests/sample_data/cmip/cmip5/output1/BNU/BNU-ESM', }], } }, 'name': 'cmip5_test_dict_collection', } col = intake.open_esm_metadatastore( collection_input_definition=collection_definition, overwrite_existing=True) assert isinstance(col.df, pd.DataFrame)
def test_search(): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore( collection_name='cmip5_test_collection') cat = c.search(model=['CanESM2', 'CSIRO-Mk3-6-0']) assert isinstance(cat.ds, xr.Dataset) assert len(cat.ds.index) > 0
def test_build_collection(): with config.set({'database-directory': './tests/test_collections'}): collection_input_definition = os.path.join(here, 'gmet-test.yml') col = intake.open_esm_metadatastore( collection_input_definition=collection_input_definition, overwrite_existing=True) assert isinstance(col.ds, xr.Dataset)
def test_search(): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore( collection_name='cmip5_test_collection') cat = c.search(model=['CanESM2', 'CSIRO-Mk3-6-0']) assert isinstance(cat.query_results, pd.DataFrame) assert not cat.query_results.empty
def test_search(): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore( collection_name='cmip6_test_collection') cat = c.search(source_id=['CNRM-ESM2-1', 'GISS-E2-1-G']) assert isinstance(cat.query_results, pd.DataFrame) assert not cat.query_results.empty
def test_search(): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore(collection_name='mpige_test') cat = c.search(component='mpiom', stream='monitoring_ym') assert isinstance(cat.ds, xr.Dataset) assert len(cat.ds.index) > 0
def test_search(): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore(collection_name='mpige_test') cat = c.search(component='mpiom', stream='monitoring_ym') assert isinstance(cat.query_results, pd.DataFrame) assert not cat.query_results.empty
def test_search(): with config.set({'database-directory': './tests/test_collections'}): col = intake.open_esm_metadatastore(collection_name='gmet_test') cat = col.search(member_id=[1, 2]) assert isinstance(cat.ds, xr.Dataset) assert len(cat.ds.index) > 0
def test_search(): with config.set({'database-directory': './tests/test_collections'}): col = intake.open_esm_metadatastore( collection_name='AWS-CESM1-LE', storage_options=storage_options ) cat = col.search(variable=['RAIN', 'FSNO']) assert len(cat.ds.index) > 0
def test_to_xarray_restore_non_coords(query): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore(collection_name='cesm1-le') cat = c.search(**query) dset = cat.to_xarray(decode_times=False) _, ds = dset.popitem() assert 'TAREA' in ds.data_vars
def test_search(): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore( collection_name='cesm_dple_test_collection') cat = c.search(variable='O2', direct_access=True) assert isinstance(cat.query_results, pd.DataFrame) assert not cat.query_results.empty
def test_search(): with config.set({'database-directory': './tests/test_collections'}): col = intake.open_esm_metadatastore(collection_name='era5_test') cat = col.search(variable_short_name=['mn2t', 'mx2t'], forecast_initial_date=['2002-02-01', '2002-02-16']) assert isinstance(cat.ds, xr.Dataset) assert len(cat.ds.index) > 0
def test_build_collection_cesm1_aws_le(): with config.set({'database-directory': './tests/test_collections'}): col = intake.open_esm_metadatastore( collection_input_definition=cdef, overwrite_existing=True, storage_options=storage_options, ) assert isinstance(col.ds, xr.Dataset)
def test_search(): with config.set({'database-directory': './tests/test_collections'}): col = intake.open_esm_metadatastore(collection_name='era5_test') cat = col.search(variable_short_name=['mn2t', 'mx2t'], forecast_initial_date=['2002-02-01', '2002-02-16']) assert isinstance(cat.query_results, pd.DataFrame) assert not cat.query_results.empty
def test_to_xarray(): with config.set({'database-directory': './tests/test_collections'}): col = intake.open_esm_metadatastore( collection_name='AWS-CESM1-LE', storage_options=storage_options ) cat = col.search(variable='FSNO', experiment='20C', component='lnd') dsets = cat.to_xarray() _, ds = dsets.popitem() assert isinstance(ds, xr.Dataset)
def test_open_collection_def_locally(filepath): """Opens all files listed in file_alias_dict.""" data_cache_dir = f'{TMPDIR}/intake-esm-tests' with config.set({ 'database-directory': './tests/test_collections', 'data-cache-directory': data_cache_dir }): d = load_collection_input_file(filepath, cache=False) assert isinstance(d, dict) assert len(d) > 0
def test_search(): with config.set({'database-directory': './tests/test_collections'}): col = intake.open_esm_metadatastore(collection_name='gmet_test') cat = col.search( member_id=[1, 2], time_range=['19800101-19801231', '19810101-19811231', '19820101-19821231'], ) assert isinstance(cat.query_results, pd.DataFrame) assert not cat.query_results.empty
def test_search(): with config.set({'database-directory': './tests/test_collections'}): col = intake.open_esm_metadatastore(collection_name='cafe_test') cat = col.search( frequency='day', variable_short_name=['ucomp', 'wt'], start_date=['1988-03-01', ] ) assert isinstance(cat.query_results, pd.DataFrame) assert not cat.query_results.empty
def test_search(): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore( collection_name='cmip6_test_collection') cat = c.search(source_id=['BCC-ESM1', 'CNRM-CM6-1', 'CNRM-ESM2-1']) assert isinstance(cat.ds, xr.Dataset) assert len(cat.ds.index) > 0 assert isinstance(cat.df, pd.DataFrame) assert isinstance(cat.nunique(), pd.Series) assert isinstance(cat.unique(), dict)
def test_to_xarray_cesm(chunks, expected_chunks): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore(collection_name='cesm1-le') cat = c.search( variable=['STF_O2', 'SHF'], ensemble=[1, 3, 9], experiment=['20C', 'RCP85'], direct_access=True, ) dset = cat.to_xarray(chunks=chunks) ds = dset['pop.h.ocn'] assert ds['SHF'].data.chunksize == expected_chunks
def test_to_xarray_cmip(chunks, expected_chunks): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore( collection_name='cmip6_test_collection') # Test for data from multiple models cat = c.search(source_id=['CNRM-ESM2-1', 'CNRM-CM6-1', 'BCC-ESM1'], variable_id=['tasmax']) ds = cat.to_xarray(decode_times=False, chunks=chunks) print(ds) assert isinstance(ds, dict) _, dset = ds.popitem() assert dset['tasmax'].data.chunksize == expected_chunks
def test_to_xarray_cmip(chunks, expected_chunks): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore( collection_name='cmip6_test_collection') # Test for data from multiple institutions cat = c.search(source_id=['CNRM-ESM2-1', 'GISS-E2-1-G'], variable_id=['prra', 'tasmax']) ds = cat.to_xarray(decode_times=False, chunks=chunks) print(ds) assert isinstance(ds, dict) nasa_dset = ds['NASA-GISS.GISS-E2-1-G.amip.Omon.gn'] assert nasa_dset['prra'].data.chunksize == expected_chunks
def test_to_xarray_cesm(chunks, expected_chunks): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore(collection_name='cesm1-le') query = { 'variable': ['STF_O2', 'SHF'], 'member_id': [1, 3, 9], 'experiment': ['20C', 'RCP85'], 'direct_access': True, } cat = c.search(**query) dset = cat.to_xarray(chunks=chunks) _, ds = dset.popitem() assert ds['SHF'].data.chunksize == expected_chunks
def test_to_xarray_cmip_empty(): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore(collection_name='cmip5_test_collection') cat = c.search( model='CanESM2', experiment='rcp85', frequency='mon', modeling_realm='atmos', ensemble_member='r2i1p1', ) with pytest.raises(ValueError): cat.to_xarray()
def test_to_xarray(): with config.set({'database-directory': './tests/test_collections'}): col = intake.open_esm_metadatastore(collection_name='mpige_test') cat = col.search(component='mpiom', stream='monitoring_ym') with pytest.warns(UserWarning): ds = cat.to_xarray() assert isinstance(ds, dict) cat = col.search( component=['mpiom', 'hamocc'], stream='monitoring_ym', experiment=['hist', 'rcp85'], ensemble=[2, 3], ) ds = cat.to_xarray(merge_exp=False) assert 'experiment_id' in ds.coords
def test_to_xarray_cesm_hsi(): data_cache_dir = f'{TMPDIR}/intake-esm-tests/transferred-data' with config.set({ 'database-directory': './tests/test_collections', 'data-cache-directory': data_cache_dir }): collection_input_definition = os.path.join( here, '../ensure-file-hsi-transfer-collection-input.yml') col = intake.open_esm_metadatastore( collection_input_definition=collection_input_definition, overwrite_existing=False) cat = col.search(variable=['SST']) dset = cat.to_xarray(chunks={'time': 365}) _, ds = dset.popitem() assert isinstance(ds['SST'], xr.DataArray)
def test_to_xarray_cmip(chunks, expected_chunks): with config.set({'database-directory': './tests/test_collections'}): c = intake.open_esm_metadatastore(collection_name='cmip5_test_collection') cat = c.search( variable=['hfls'], frequency='mon', modeling_realm='atmos', model=['CNRM-CM5'] ) dset = cat.to_xarray(decode_times=True, chunks=chunks) ds = dset['CNRM-CERFACS.CNRM-CM5.historical.mon.atmos'] assert ds['hfls'].data.chunksize == expected_chunks # Test for data from multiple institutions cat = c.search(variable=['hfls'], frequency='mon', modeling_realm='atmos') ds = cat.to_xarray(decode_times=False, chunks=chunks) assert isinstance(ds, dict) assert 'CCCma.CanCM4.historical.mon.atmos' in ds.keys()
def test_file_transfer_symlink(): data_cache_dir = f'{TMPDIR}/intake-esm-tests/transferred-data' with config.set( {'database-directory': './tests/test_collections', 'data-cache-directory': data_cache_dir} ): collection_input_definition = os.path.join(here, 'copy-to-cache-collection-input.yml') col = intake.open_esm_metadatastore( collection_input_definition=collection_input_definition, overwrite_existing=True ) cat = col.search(variable=['STF_O2', 'SHF']) query_results = _ensure_file_access(cat.ds) local_urlpaths = query_results['file_fullpath'].tolist() assert isinstance(local_urlpaths, list) assert len(local_urlpaths) > 0 shutil.rmtree(data_cache_dir)