def test_nc_wrong_concat_var(): s1 = load_source( "dummy-source", kind="netcdf", dims=["lat", "lon", "time"], variables=["a", "b"], coord_values=dict(time=[1, 2]), ) ds1 = s1.to_xarray() s2 = load_source( "dummy-source", kind="netcdf", dims=["lat", "time"], variables=["a", "b"], coord_values=dict(time=[8, 9]), ) ds2 = s2.to_xarray() target = xr.concat([ds1, ds2], dim="time") print(target) ds = load_source("multi", [s1, s2], merger="concat(concat_dim=time)") ds.graph() merged = ds.to_xarray() assert target.identical(merged)
def test_bbox(): area = [30.0, 2.0, 3.0, 4.0] bbox = BoundingBox(north=30, west=2, south=3, east=4) assert bbox_list(None, area) == bbox assert bbox_list(area=area, ignore=None) == bbox assert bbox_tuple(area) == tuple(area) assert bbox_tuple(area=area) == tuple(area) assert bbox_bbox(area) == area assert bbox_dict(area) == dict(north=30, west=2, south=3, east=4) assert bbox_defaults(area) == bbox source = load_source("file", climetlab_file("docs/examples/test.grib")) assert bbox_tuple(source[0]) == (73.0, -27.0, 33.0, 45.0) source = load_source("file", climetlab_file("docs/examples/test.nc")) assert bbox_tuple(source[0]) == (73.0, -27.0, 33.0, 45.0) assert bbox_tuple("france") == (54.5, -6.0, 39.0, 9.5) assert bbox_tuple("verification.france") == (51.5, -5.0, 42.0, 8.5)
def test_nc_merge_var(): s1 = load_source( "dummy-source", kind="netcdf", dims=["lat", "lon", "time"], variables=["a", "b"], ) ds1 = s1.to_xarray() s2 = load_source( "dummy-source", kind="netcdf", dims=["lat", "lon", "time"], variables=["c", "d"], ) ds2 = s2.to_xarray() target = xr.merge([ds1, ds2]) ds = load_source("multi", [s1, s2]) ds.graph() merged = ds.to_xarray() assert target.identical(merged) target2 = xr.open_mfdataset([s1.path, s2.path]) assert target2.identical(merged)
def test_zenodo_error_2(): with pytest.raises(ValueError, match=r"Invalid zenodo key.*"): cml.load_source( "zenodo", record_id=4707154, file_key="unknown_", )
def _load(self, variable, period, domain=None, time=None, grid=None): self.variable = variable request = dict( variable=self.variable, product_type="reanalysis", ) if domain is not None: request["area"] = domain_to_area(domain) if time is not None: request["time"] = time else: request["time"] = list(range(0, 24)) if grid is not None: if isinstance(grid, (int, float)): request["grid"] = [grid, grid] else: request["grid"] = grid if isinstance(period, int): period = (period, period) if isinstance(period, (tuple, list)) and len(period) == 1: period = (period[0], period[0]) sources = [] for year in range(period[0], period[1] + 1): request["year"] = year sources.append( load_source("cds", "reanalysis-era5-single-levels", **request)) self.source = load_source("multi", sources)
def test_multi(): if not os.path.exists(os.path.expanduser("~/.cdsapirc")): pytest.skip("No ~/.cdsapirc") s1 = load_source( "cds", "reanalysis-era5-single-levels", product_type="reanalysis", param="2t", date="2021-03-01", format="netcdf", ) s1.to_xarray() s2 = load_source( "cds", "reanalysis-era5-single-levels", product_type="reanalysis", param="2t", date="2021-03-02", format="netcdf", ) s2.to_xarray() source = load_source("multi", s1, s2) for s in source: print(s) source.to_xarray()
def xtest_multi_grib(): ds = load_source( "multi", load_source("dummy-source", kind="grib", date=20000101), load_source("dummy-source", kind="grib", date=20000102), ) assert len(ds) == 2
def test_multi(): s1 = load_source( "cds", "reanalysis-era5-single-levels", product_type="reanalysis", param="2t", date="2021-03-01", format="netcdf", ) print(s1.to_xarray()) s2 = load_source( "cds", "reanalysis-era5-single-levels", product_type="reanalysis", param="2t", date="2021-03-02", format="netcdf", ) print(s2.to_xarray()) source = load_source("multi", s1, s2) for s in source: print(s) print(source.to_xarray())
def test_part_url(): ds = load_source( "url", "http://download.ecmwf.int/test-data/metview/gallery/temp.bufr", ) ds = load_source( "url", "http://download.ecmwf.int/test-data/metview/gallery/temp.bufr", parts=((0, 4),), ) assert os.path.getsize(ds.path) == 4 with open(ds.path, "rb") as f: assert f.read() == b"BUFR" ds = load_source( "url", "http://download.ecmwf.int/test-data/metview/gallery/temp.bufr", parts=((0, 10), (50, 10), (60, 10)), ) print(ds.path) assert os.path.getsize(ds.path) == 30 with open(ds.path, "rb") as f: assert f.read()[:4] == b"BUFR"
def _concat_var_different_coords_1(kind1, kind2): s1 = load_source( "dummy-source", kind=kind1, variables=["a"], dims=["lat", "lon", "time"], coord_values=dict(time=[1, 3]), ) ds1 = s1.to_xarray() s2 = load_source( "dummy-source", kind=kind2, variables=["a"], dims=["lat", "lon", "time"], coord_values=dict(time=[2, 4]), ) ds2 = s2.to_xarray() target = xr.concat([ds1, ds2], dim="time") # print(target) ds = load_source("multi", [s1, s2], merger="concat(concat_dim=time)") ds.graph() merged = ds.to_xarray() assert target.identical(merged), f"Contat failed for {kind1}, {kind2}"
def retrieve_and_check(index, request, range_method=None, **kwargs): print("--------") # parts = index.lookup_request(request) print("range_method", range_method) print("REQUEST", request) # for url, p in parts: # total = len(index.get_backend(url).entries) # print(f"PARTS: {len(p)}/{total} parts in {url}") now = time.time() s = load_source("indexed-urls", index, request, range_method=range_method, **kwargs) elapsed = time.time() - now print("ELAPSED", elapsed) try: paths = [s.path] except AttributeError: paths = [p.path for p in s.sources] for path in paths: # check that the downloaded gribs match the request for grib in load_source("file", path): for k, v in request.items(): if k == "param": k = "shortName" assert check_grib_value(grib._get(k), v), (grib._get(k), v) return elapsed
def test_url_ftp_source_anonymous(): date = datetime.datetime.now() - datetime.timedelta(days=1) load_source( "url-pattern", ("ftp://ftp.ncep.noaa.gov/pub/data/nccf/com/gfs/prod/" "gfs.{date:date(%Y%m%d)}/00/atmos/wafsgfs_P_t00z_intdsk84.grib2"), {"date": date}, )
def test_multi_grib_mixed(): ds = load_source( "multi", load_source("dummy-source", kind="grib", date=20000101), load_source("dummy-source", kind="grib", date=20000102), load_source("dummy-source", kind="unknown", hello="world"), ) assert len(ds) == 2
def test_multi_grib(): ds = load_source( "multi", load_source("dummy-source", kind="grib", date=20000101), load_source("dummy-source", kind="grib", date=20000102), ) assert len(ds) == 2 ds.to_xarray() ds.statistics()
def test_a(): s = cml.load_source("local", os.path.join(here, "gribs", "a"), param="t") s = cml.load_source("local", os.path.join(here, "gribs", "a"), param="t", level="1000") print(s, len(s)) ds = s.to_xarray() print(ds)
def test_multi_missing_url(): from requests.exceptions import HTTPError with pytest.raises(HTTPError, match=".*this-file-does-not-exists.*"): load_source( "url-pattern", "http://download.ecmwf.int/test-data/metview/gallery/{x}", x=["temp.bufr", "this-file-does-not-exists.bufr"], )
def test_tfdataset_grib_4(): s = cml.load_source( "multi", cml.load_source("file", climetlab_file("docs/examples/test.grib")), cml.load_source("file", climetlab_file("docs/examples/test.grib")), ) dataset = s.to_tfdataset(label="paramId") for r in dataset: print(len(r), [type(x) for x in r])
def test_multi_graph_2(): with temp_directory() as tmpdir: os.mkdir(os.path.join(tmpdir, "a1")) a11 = load_source("dummy-source", kind="grib", date=20000101) a11.save(os.path.join(tmpdir, "a1", "a11.grib")) a12 = load_source("dummy-source", kind="grib", date=20000102) a12.save(os.path.join(tmpdir, "a1", "a12.grib")) os.mkdir(os.path.join(tmpdir, "b1")) b11 = load_source("dummy-source", kind="grib", date=20000103) b11.save(os.path.join(tmpdir, "b1", "b11.grib")) b12 = load_source("dummy-source", kind="grib", date=20000104) b12.save(os.path.join(tmpdir, "b1", "b12.grib")) os.mkdir(os.path.join(tmpdir, "a2")) a21 = load_source("dummy-source", kind="grib", date=20000105) a21.save(os.path.join(tmpdir, "a2", "a21.grib")) a22 = load_source("dummy-source", kind="grib", date=20000106) a22.save(os.path.join(tmpdir, "a2", "a22.grib")) os.mkdir(os.path.join(tmpdir, "b2")) b21 = load_source("dummy-source", kind="grib", date=20000107) b21.save(os.path.join(tmpdir, "b2", "b21.grib")) b22 = load_source("dummy-source", kind="grib", date=20000108) b22.save(os.path.join(tmpdir, "b2", "b22.grib")) def filter(path_or_url): return path_or_url.endswith("2.grib") ds = load_source("file", tmpdir, filter=filter) ds.graph() assert len(ds) == 4
def test_glob(): s = load_source("file", climetlab_file("docs/examples/test.grib")) with temp_directory() as tmpdir: s.save(os.path.join(tmpdir, "a.grib")) s.save(os.path.join(tmpdir, "b.grib")) s = load_source("file", os.path.join(tmpdir, "*.grib")) assert len(s) == 4, len(s) s = load_source("file", tmpdir) assert len(s) == 4, len(s)
def test_dates(): npdate = np.datetime64("2016-01-01") assert dates_1(date=npdate) == [datetime.datetime(2016, 1, 1)] source = load_source("file", "docs/examples/test.grib") assert dates_1(source[0]) == [datetime.datetime(2020, 5, 13, 12, 0)] source = load_source("file", "docs/examples/test.nc") # For now with pytest.raises(NotImplementedError): assert dates_1(source[0]) == [datetime.datetime(2020, 5, 13, 12, 0)]
def test_nc_merge_concat_var(): target, a1, a2, b1, b2 = get_hierarchy() s = load_source( "multi", [ load_source("multi", [a1, b1], merger="merge()"), load_source("multi", [a2, b2], merger="merge()"), ], merger="concat(dim=forecast_time)", ) merged = s.to_xarray() assert target.identical(merged)
def test_extension(): s = load_source( "url", f"{TEST_DATA_URL}/fixtures/tfrecord/EWCTest0.0.tfrecord", ) assert s.path.endswith(".0.tfrecord") s = load_source( "url", f"{TEST_DATA_URL}/fixtures/tfrecord/EWCTest0.1.tfrecord", ) assert s.path.endswith(".1.tfrecord")
def test_user_2(): s = load_source("file", climetlab_file("docs/examples/test.grib")) home_file = os.path.expanduser("~/.climetlab/test.grib") try: s.save(home_file) # Test expand vars s = load_source("file", "$HOME/.climetlab/test.grib", expand_vars=True) assert len(s) == 2 finally: try: os.unlink(home_file) except OSError: LOG.exception("unlink(%s)", home_file)
def test_cache_2(): with temp_directory() as tmpdir: with settings.temporary(): settings.set("cache-directory", tmpdir) settings.set("maximum-cache-size", "5MB") settings.set("number-of-download-threads", 5) assert cache_size() == 0 load_source( "url-pattern", "https://get.ecmwf.int/test-data/climetlab/1mb-{n}.bin", { "n": [0, 1, 2, 3, 4], }, ) assert cache_size() == 5 * 1024 * 1024, cache_size() cnt = 0 for i, f in enumerate(cache_entries()): print("FILE", i, f) cnt += 1 assert cnt == 5, f"Files in cache database: {cnt}" load_source( "url-pattern", "https://get.ecmwf.int/test-data/climetlab/1mb-{n}.bin", { "n": [5, 6, 7, 8, 9], }, ) assert cache_size( ) == 5 * 1024 * 1024, cache_size() / 1024.0 / 1024.0 cnt = 0 for i, f in enumerate(cache_entries()): print("FILE", i, f) cnt += 1 assert cnt == 5, f"Files in cache database: {cnt}" cnt = 0 for n in os.listdir(tmpdir): if n.startswith("cache-") and n.endswith(".db"): continue cnt += 1 assert cnt == 5, f"Files in cache directory: {cnt}"
def test_multi_directory_1(): with temp_directory() as directory: for date in (20000101, 20000102): ds = load_source("dummy-source", kind="grib", date=date) ds.save(os.path.join(directory, f"{date}.grib")) ds = load_source("file", directory) print(ds) assert len(ds) == 2 ds.graph() with temp_file() as filename: ds.save(filename) ds = load_source("file", filename) assert len(ds) == 2
def test_normalize_dates_from_source(): dates_3 = normalize("d", "date")(f) dates_list_3 = normalize("d", "date", multiple=True)(f) source = load_source("file", climetlab_file("docs/examples/test.grib")) assert dates_3(source[0]) == datetime.datetime(2020, 5, 13, 12, 0) assert dates_list_3(source[0]) == [datetime.datetime(2020, 5, 13, 12, 0)] source = load_source("file", climetlab_file("docs/examples/test.nc")) # For now with pytest.raises(NotImplementedError): assert dates_3(source[0]) == datetime.datetime(2020, 5, 13, 12, 0) assert dates_list_3( source[0]) == [datetime.datetime(2020, 5, 13, 12, 0)]
def test_zenodo_1(): ds = cml.load_source("zenodo", record_id=5020468, filter=only_csv) pd = ds.to_pandas() print(pd) assert len(pd) == 49
def test_download_zip_1(): ds = load_source( "url", f"{TEST_DATA_URL}/input/grib.zip", ) assert len(ds) == 6, len(ds)
def test_download_zip_1(): ds = load_source( "url", "https://get.ecmwf.int/repository/test-data/climetlab/grib.zip", ) assert len(ds) == 6, len(ds)
def test_unknown_reader(): s = cml.load_source( "file", os.path.join(os.path.dirname(__file__), "unknown_file.unknown_ext"), ) print(s) assert isinstance(s._reader, cml.readers.unknown.Unknown)