def test_auto_drop_ocean_regions(in_regions, safe, out_regions, tmpdir, caplog, test_data_cmip6output_dir): OUTPUT_DIR = str(tmpdir) crunch_contact = "join-files-test" runner = CliRunner(mix_stderr=False) with caplog.at_level("DEBUG"): result = runner.invoke( crunch_data, [ test_data_cmip6output_dir, OUTPUT_DIR, crunch_contact, "--regexp", ".*gpp.*", "--drs", "CMIP6Output", "--small-number-workers", 1, "--regions", ",".join(in_regions), ], ) assert result.exit_code == 0 key_phrase = "Detected land data, dropping ocean related regions so regions to crunch are now: {}".format( out_regions) if not safe: assert key_phrase in result.stderr, result.stderr else: assert key_phrase not in result.stderr, result.stderr for out_file in glob(join(OUTPUT_DIR, "**", "*.nc"), recursive=True): res = load_scmdataframe(out_file) assert sorted(res["region"].unique()) == sorted(out_regions)
def test_crunching_join_files(tmpdir, caplog, test_data_cmip6output_dir): INPUT_DIR = join( test_data_cmip6output_dir, "CMIP6", "CMIP", "IPSL", "IPSL-CM6A-LR", "piControl", "r1i1p1f1", "Amon", "tas", "gr", "v20181123", ) OUTPUT_DIR = str(tmpdir) crunch_contact = "join-files-test" runner = CliRunner(mix_stderr=False) with caplog.at_level("DEBUG"): result = runner.invoke( crunch_data, [ INPUT_DIR, OUTPUT_DIR, crunch_contact, "--drs", "CMIP6Output", "-f", "--small-threshold", 0, "--medium-number-workers", 1, ], ) assert result.exit_code == 0 assert "netcdf-scm: {}".format(netcdf_scm.__version__) in caplog.messages expected_file = join( OUTPUT_DIR, "netcdf-scm-crunched", "CMIP6", "CMIP", "IPSL", "IPSL-CM6A-LR", "piControl", "r1i1p1f1", "Amon", "tas", "gr", "v20181123", "netcdf-scm_tas_Amon_IPSL-CM6A-LR_piControl_r1i1p1f1_gr_284001-285912.nc", ) assert isfile(expected_file) crunched_data = load_scmdataframe(expected_file) assert crunched_data.metadata["crunch_contact"] == crunch_contact assert crunched_data["time"].min() == dt.datetime(2840, 1, 16, 12) assert crunched_data["time"].max() == dt.datetime(2859, 12, 16, 12)
def test_save_cube_and_load_scmdataframe(tmpdir, test_cmip6_output_file): base = CMIP6OutputCube() base.load_data_from_path(test_cmip6_output_file) out_file = os.path.join(tmpdir, "test_save_file.nc") save_netcdf_scm_nc(base.get_scm_timeseries_cubes(), out_file) loaded = load_scmdataframe(out_file) assert (loaded["scenario"] == "1pctCO2").all() assert (loaded["climate_model"] == "BCC-CSM2-MR").all() assert (loaded["variable"] == "rlut").all() assert (loaded["variable_standard_name"] == "toa_outgoing_longwave_flux" ).all() assert (loaded["unit"] == "W m^-2").all() assert (loaded["activity_id"] == "CMIP").all() assert (loaded["member_id"] == "r1i1p1f1").all() assert (loaded["mip_era"] == "CMIP6").all() assert (loaded["activity_id"] == "CMIP").all() _assert_scm_dataframe(loaded, 236.569464, region="World", year=1859, month=12) _assert_scm_dataframe(loaded, 243.072575, region="World|Ocean", year=1856, month=10) _assert_scm_dataframe(loaded, 235.025871, region="World|Southern Hemisphere", year=1853, month=6) _assert_scm_dataframe(loaded, 234.333421, region="World|Southern Hemisphere|Land", year=1850, month=1) assert loaded.metadata[ "crunch_netcdf_scm_version"] == "{} (more info at github.com/znicholls/netcdf-scm)".format( netcdf_scm.__version__) assert (loaded.metadata["institution"] == "Beijing Climate Center, Beijing 100081, China") assert loaded.metadata["title"] == "BCC-CSM2-MR output prepared for CMIP6" np.testing.assert_allclose( loaded.metadata["land_fraction_northern_hemisphere"], 0.38681185060261924) assert ( loaded.metadata["source"] == "BCC-CSM 2 MR (2017): aerosol: none atmos: BCC_AGCM3_MR (T106; 320 x 160 longitude/latitude; 46 levels; top level 1.46 hPa) atmosChem: none land: BCC_AVIM2 landIce: none ocean: MOM4 (1/3 deg 10S-10N, 1/3-1 deg 10-30 N/S, and 1 deg in high latitudes; 360 x 232 longitude/latitude; 40 levels; top grid cell 0-10 m) ocnBgchem: none seaIce: SIS2" )
def test_load_scmdataframe(test_data_netcdfscm_nc_file): loaded = load_scmdataframe(test_data_netcdfscm_nc_file) assert (loaded["scenario"] == "rcp45").all() assert (loaded["climate_model"] == "ACCESS1-0").all() assert (loaded["variable"] == "tas").all() assert (loaded["variable_standard_name"] == "air_temperature").all() assert (loaded["unit"] == "K").all() assert (loaded["member_id"] == "r1i1p1").all() assert (loaded["mip_era"] == "CMIP5").all() assert (loaded["activity_id"] == "cmip5").all() _assert_scm_dataframe(loaded, 285.521667, region="World", year=2006, month=1) _assert_scm_dataframe(loaded, 279.19043, region="World|Land", year=2019, month=3) _assert_scm_dataframe(loaded, 287.103729, region="World|Northern Hemisphere", year=2032, month=11) _assert_scm_dataframe( loaded, 290.850189, region="World|Northern Hemisphere|Ocean", year=2049, month=12, ) assert (loaded.metadata["crunch_netcdf_scm_version"] == "1.0.0+97.g6d5c5ae (more info at github.com/znicholls/netcdf-scm)") assert ( loaded.metadata["institution"] == "CSIRO (Commonwealth Scientific and Industrial Research Organisation, Australia), and BOM (Bureau of Meteorology, Australia)" ) assert (loaded.metadata["title"] == "ACCESS1-0 model output prepared for CMIP5 RCP4.5") np.testing.assert_allclose( loaded.metadata["land_fraction_northern_hemisphere"], 0.38912639)
def _do_comparison(res, expected, update=False): """Run test that crunched files are unchanged Parameters ---------- res : str Directory written as part of the test expected : str Directory against which the comparison should be done update : bool If True, don't perform the test and instead simply overwrite the ``expected`` with ``res`` Raises ------ AssertionError If ``update`` is ``False`` and ``res`` and ``expected`` are not identical. """ paths_to_walk = [expected, res] if not update else [res] for p in paths_to_walk: for dirpath, _, filenames in walk(p): if filenames: if update: path_to_check = dirpath.replace(res, expected) if not path.exists(path_to_check): makedirs(path_to_check) for f in filenames: base_f = join(dirpath, f) comparison_p = expected if p == res else res comparison_f = base_f.replace(p, comparison_p) assert base_f != comparison_f if update: print("Updating {}".format(comparison_f)) shutil.copy(base_f, comparison_f) else: try: base_scmdf = load_scmdataframe(base_f) comparison_scmdf = load_scmdataframe( comparison_f) assert_scmdata_frames_allclose( base_scmdf, comparison_scmdf) except NotImplementedError: # 3D data base_cubes = iris.load(base_f) comparison_cubes = iris.load(comparison_f) for comparison_cube in comparison_cubes: region = comparison_cube.attributes[ "region"] for base_cube in base_cubes: if base_cube.attributes[ "region"] == region: break np.testing.assert_allclose( base_cube.data, comparison_cube.data) base_cube.attributes.pop( "crunch_netcdf_scm_version") comparison_cube.attributes.pop( "crunch_netcdf_scm_version") assert (base_cube.attributes == comparison_cube.attributes) if update: pytest.skip("Updated {}".format(expected))
def test_crunching_arguments(tmpdir, caplog, test_data_marble_cmip5_dir): INPUT_DIR = test_data_marble_cmip5_dir OUTPUT_DIR = str(tmpdir) VAR_TO_CRUNCH = ".*fco2antt.*" DATA_SUB_DIR = "custom-name" CRUNCH_CONTACT = "test crunch contact info <email>" runner = CliRunner() with caplog.at_level("INFO"): result = runner.invoke( crunch_data, [ INPUT_DIR, OUTPUT_DIR, CRUNCH_CONTACT, "--drs", "MarbleCMIP5", "--regexp", VAR_TO_CRUNCH, "--data-sub-dir", DATA_SUB_DIR, "-f", "--small-threshold", 0, "--medium-threshold", 0.5, ], ) assert result.exit_code == 0 assert "netcdf-scm: {}".format(netcdf_scm.__version__) in caplog.text assert "crunch-contact: {}".format(CRUNCH_CONTACT) in caplog.text assert "source: {}".format(INPUT_DIR) in caplog.text assert "destination: {}".format(OUTPUT_DIR) in caplog.text assert "drs: MarbleCMIP5" in caplog.text assert "regexp: {}".format(VAR_TO_CRUNCH) in caplog.text assert "regions: World,World|Northern Hemisphere" in caplog.text assert "force: True" in caplog.text assert "small_number_workers: 10" in caplog.text assert "small_threshold: 0" in caplog.text assert "medium_number_workers: 3" in caplog.text assert "medium_threshold: 0.5" in caplog.text assert "force_lazy_threshold: 1000" in caplog.text assert ( "Crunching 1 directories with greater than or equal to 0.5 million data points" in caplog.text) assert ("Making output directory: {}/custom-name".format(OUTPUT_DIR) in caplog.messages) assert "Attempting to process: ['fco2antt" in caplog.text assert "Attempting to process: ['tas" not in caplog.text assert isdir(join(OUTPUT_DIR, DATA_SUB_DIR, "cmip5")) out_file = join( OUTPUT_DIR, DATA_SUB_DIR, "cmip5", "1pctCO2", "Amon", "fco2antt", "CanESM2", "r1i1p1", "netcdf-scm_fco2antt_Amon_CanESM2_1pctCO2_r1i1p1_198001-198912.nc", ) assert isfile(out_file) loaded = load_scmdataframe(out_file) assert (loaded["scenario"] == "1pctCO2").all() assert (loaded["climate_model"] == "CanESM2").all() assert (loaded["variable"] == "fco2antt").all() assert ( loaded["variable_standard_name"] == "tendency_of_atmosphere_mass_content_of_carbon_dioxide_expressed_as_carbon_due_to_anthropogenic_emission" ).all() assert (loaded["unit"] == "kg m^-2 s^-1").all() assert (loaded["member_id"] == "r1i1p1").all() assert (loaded["mip_era"] == "CMIP5").all() assert (loaded["activity_id"] == "cmip5").all() assert sorted(loaded["region"].unique()) == sorted([ "World", "World|Land", "World|Ocean", "World|Northern Hemisphere", "World|Northern Hemisphere|Land", "World|Northern Hemisphere|Ocean", "World|Southern Hemisphere", "World|Southern Hemisphere|Land", "World|Southern Hemisphere|Ocean", ]) # file is entirely zeros... np.testing.assert_allclose(loaded.timeseries().values, 0) caplog.clear() with caplog.at_level("INFO"): result_skip = runner.invoke( crunch_data, [ INPUT_DIR, OUTPUT_DIR, "test", "--drs", "MarbleCMIP5", "--regexp", VAR_TO_CRUNCH, "--data-sub-dir", DATA_SUB_DIR, "--small-number-workers", 1, ], ) assert result_skip.exit_code == 0 skip_str = "Skipped (already exists, not overwriting) {}".format(out_file) assert skip_str in caplog.text
def test_crunching(tmpdir, caplog, test_data_knmi_dir, test_data_marble_cmip5_dir): INPUT_DIR = test_data_marble_cmip5_dir OUTPUT_DIR = str(tmpdir) VAR_TO_CRUNCH = ".*tas.*" crunch_contact = "knmi-verification" runner = CliRunner(mix_stderr=False) with caplog.at_level("DEBUG"): result = runner.invoke( crunch_data, [ INPUT_DIR, OUTPUT_DIR, crunch_contact, "--drs", "MarbleCMIP5", "--regexp", VAR_TO_CRUNCH, "-f", "--small-number-workers", 1, ], ) assert result.exit_code == 0 assert "netcdf-scm: {}".format(netcdf_scm.__version__) in caplog.messages assert ( "Making output directory: {}/netcdf-scm-crunched".format(OUTPUT_DIR) in caplog.messages) # Check that there is a log file which contains 'INFO' log messages log_fnames = glob(join(OUTPUT_DIR, "netcdf-scm-crunched", "*.log")) assert len(log_fnames) == 1 with open(log_fnames[0]) as fh: log_file = fh.read() assert "DEBUG" in log_file # Check that the logs are also written to stderr assert "DEBUG" not in result.stderr assert "INFO" in result.stderr # Check the output_tracker file with open( join(OUTPUT_DIR, "netcdf-scm-crunched", "netcdf-scm_crunched.jsonl")) as fh: lines = fh.readlines() assert len(lines) == 6 # check that CanESM2 has areacella file for l in lines: d = json.loads(l) if "tas_Amon_CanESM2_1pctCO2_r1i1p1_189201-190312.nc" in d[ "files"][0]: checked_metadata = True assert len(d["metadata"]["areacella"]["files"]) == 1 assert len(d["metadata"]["sftlf"]["files"]) == 1 assert checked_metadata THRESHOLD_PERCENTAGE_DIFF = 10**-1 files_found = 0 for dirpath, dirnames, filenames in walk(OUTPUT_DIR): if not dirnames: assert len(filenames) == 1 filename = filenames[0] files_found += 1 knmi_data_name = "global_{}.dat".format("_".join( filename.split("_")[1:6])) knmi_data_path = join(test_data_knmi_dir, knmi_data_name) if not isfile(knmi_data_path): print("No data available for {}".format(knmi_data_path)) continue knmi_data = pd.read_csv( knmi_data_path, skiprows=3, delim_whitespace=True, header=None, names=["year", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], ).melt(id_vars="year", var_name="month") knmi_data["year"] = knmi_data["year"].astype(int) knmi_data["month"] = knmi_data["month"].astype(int) knmi_data = knmi_data.set_index(["year", "month"]) crunched_data = load_scmdataframe(join(dirpath, filename)) assert crunched_data.metadata["crunch_contact"] == crunch_contact comparison_data = (crunched_data.filter( region="World").timeseries().stack().to_frame().reset_index()[[ "time", 0 ]]) comparison_data = comparison_data.rename({0: "value"}, axis="columns") comparison_data["year"] = comparison_data["time"].apply( lambda x: x.year) comparison_data["month"] = comparison_data["time"].apply( lambda x: x.month) comparison_data = comparison_data.drop("time", axis="columns") comparison_data = comparison_data.set_index(["year", "month"]) rel_difference = (knmi_data - comparison_data) / knmi_data # drop regions where times are not equal rel_difference = rel_difference.dropna() assert not rel_difference.empty, "not testing anything" assert_message = "{} data is not the same to within {}%".format( filename, THRESHOLD_PERCENTAGE_DIFF) all_close = (np.abs(rel_difference.values) < THRESHOLD_PERCENTAGE_DIFF / 100).all() assert all_close, assert_message print("{} file matches KNMI data to within {}%".format( filename, THRESHOLD_PERCENTAGE_DIFF)) assert files_found == 6