def read(dictArgs): """ read data from model and obs files, process data and return it """ dsmodel = xr.open_mfdataset(dictArgs["infile"], combine="by_coords", decode_times=False) if dictArgs["obsfile"] is not None: # priority to user-provided obs file dsobs = xr.open_mfdataset(dictArgs["obsfile"], combine="by_coords", decode_times=False) else: # use dataset from catalog, either from command line or default cat_platform = "catalogs/obs_catalog_" + dictArgs["platform"] + ".yml" catfile = pkgr.resource_filename("om4labs", cat_platform) cat = intake.open_catalog(catfile) dsobs = cat[dictArgs["dataset"]].to_dask() # read in model and obs data datamodel = read_data(dsmodel, dictArgs["possible_variable_names"]) dataobs = read_data(dsobs, dictArgs["possible_variable_names"]) # subset data if dictArgs["depth"] is None: dictArgs["depth"] = dictArgs["surface_default_depth"] if dictArgs["depth"] is not None: datamodel = subset_data(datamodel, "assigned_depth", dictArgs["depth"]) dataobs = subset_data(dataobs, "assigned_depth", dictArgs["depth"]) # reduce data along depth (not yet implemented) if "depth_reduce" in dictArgs: if dictArgs["depth_reduce"] == "mean": # do mean pass elif dictArgs["depth_reduce"] == "sum": # do sum pass # reduce data along time, here mandatory if ("assigned_time" in datamodel.dims) and (len(datamodel["assigned_time"]) > 1): warnings.warn("input dataset has more than one time record, " + "performing non-weighted average") datamodel = simple_average(datamodel, "assigned_time") if ("assigned_time" in dataobs.dims) and len(dataobs["assigned_time"]) > 1: warnings.warn("reference dataset has more than one time record, " + "performing non-weighted average") dataobs = simple_average(dataobs, "assigned_time") datamodel = datamodel.squeeze() dataobs = dataobs.squeeze() # check final data is 2d assert len(datamodel.dims) == 2 assert len(dataobs.dims) == 2 # check consistency of coordinates assert np.allclose(datamodel["assigned_lon"], dataobs["assigned_lon"]) assert np.allclose(datamodel["assigned_lat"], dataobs["assigned_lat"]) # homogeneize coords dataobs = copy_coordinates(datamodel, dataobs, ["assigned_lon", "assigned_lat"]) # restrict model to where obs exists datamodel = datamodel.where(dataobs) # dump values model = datamodel.to_masked_array() obs = dataobs.to_masked_array() x = datamodel["assigned_lon"].values y = datamodel["assigned_lat"].values # compute area if "areacello" in dsmodel.variables: area = dsmodel["areacello"].values else: if model.shape == (180, 360): area = compute_area_regular_grid(dsmodel) else: raise IOError("no cell area provided") return x, y, area, model, obs
def read(dictArgs): """ read data from model and obs files, process data and return it """ if dictArgs["config"] is not None: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], dictArgs["config"]) ds_static = cat["ocean_static_1x1"].to_dask() if dictArgs["static"] is not None: ds_static = xr.open_dataset(dictArgs["static"]) # Compute basin codes codes = generate_basin_codes(ds_static, lon="lon", lat="lat") codes = np.array(codes) # depth coordinate if "deptho" in list(ds_static.variables): depth = ds_static.deptho.to_masked_array() elif "depth" in list(ds_static.variables): depth = ds_static.depth.to_masked_array() else: raise ValueError("Unable to find depth field.") depth = np.where(np.isnan(depth), 0.0, depth) depth = depth * -1.0 dsmodel = xr.open_mfdataset(dictArgs["infile"], combine="by_coords", use_cftime=True) if dictArgs["obsfile"] is not None: # priority to user-provided obs file dsobs = xr.open_mfdataset(dictArgs["obsfile"], combine="by_coords", decode_times=False) else: # use dataset from catalog, either from command line or default cat = open_intake_catalog(dictArgs["platform"], "obs") dsobs = cat[dictArgs["dataset"]].to_dask() # read in model and obs data datamodel = read_data(dsmodel, dictArgs["possible_variable_names"]) dataobs = read_data(dsobs, dictArgs["possible_variable_names"]) # reduce data along time, here mandatory if ("assigned_time" in datamodel.dims) and (len(datamodel["assigned_time"]) > 1): warnings.warn("input dataset has more than one time record, " + "performing non-weighted average") datamodel = simple_average(datamodel, "assigned_time") if ("assigned_time" in dataobs.dims) and len(dataobs["assigned_time"]) > 1: warnings.warn("reference dataset has more than one time record, " + "performing non-weighted average") dataobs = simple_average(dataobs, "assigned_time") datamodel = datamodel.squeeze() dataobs = dataobs.squeeze() # check final data is 3d assert len(datamodel.dims) == 3 assert len(dataobs.dims) == 3 # check consistency of coordinates assert np.allclose(datamodel["assigned_lon"], dataobs["assigned_lon"]) assert np.allclose(datamodel["assigned_lat"], dataobs["assigned_lat"]) # homogeneize coords dataobs = copy_coordinates(datamodel, dataobs, ["assigned_lon", "assigned_lat"]) # restrict model to where obs exists datamodel = datamodel.where(dataobs) # dump values model = datamodel.to_masked_array() obs = dataobs.to_masked_array() x = datamodel["assigned_lon"].values y = datamodel["assigned_lat"].values z = datamodel["assigned_depth"].values # convert z to negative values z = z * -1 # compute area if "areacello" in dsmodel.variables: area = dsmodel["areacello"].values else: if (model.shape[-2], model.shape[-1]) == (180, 360): area = compute_area_regular_grid(dsmodel) else: raise IOError("no cell area provided") # date range dates = date_range(dsmodel) return y, z, depth, area, codes, model, obs, dates
def test_copy_coordinates(): from om4labs.om4common import copy_coordinates da = xr.DataArray(data=np.empty((360, 2)), dims=("lon", "bnds")) da = copy_coordinates(grid_1x1["lon_bnds"], da, ["lon", "bnds"]) assert list(da.coords).sort() == list(grid_1x1["lon_bnds"].coords).sort()