示例#1
0
def read(dictArgs):
    """ read data from model and obs files, process data and return it """

    dsmodel = xr.open_mfdataset(dictArgs["infile"],
                                combine="by_coords",
                                decode_times=False)

    if dictArgs["obsfile"] is not None:
        # priority to user-provided obs file
        dsobs = xr.open_mfdataset(dictArgs["obsfile"],
                                  combine="by_coords",
                                  decode_times=False)
    else:
        # use dataset from catalog, either from command line or default
        cat_platform = "catalogs/obs_catalog_" + dictArgs["platform"] + ".yml"
        catfile = pkgr.resource_filename("om4labs", cat_platform)
        cat = intake.open_catalog(catfile)
        dsobs = cat[dictArgs["dataset"]].to_dask()

    # read in model and obs data
    datamodel = read_data(dsmodel, dictArgs["possible_variable_names"])
    dataobs = read_data(dsobs, dictArgs["possible_variable_names"])

    # subset data
    if dictArgs["depth"] is None:
        dictArgs["depth"] = dictArgs["surface_default_depth"]

    if dictArgs["depth"] is not None:
        datamodel = subset_data(datamodel, "assigned_depth", dictArgs["depth"])
        dataobs = subset_data(dataobs, "assigned_depth", dictArgs["depth"])

    # reduce data along depth (not yet implemented)
    if "depth_reduce" in dictArgs:
        if dictArgs["depth_reduce"] == "mean":
            # do mean
            pass
        elif dictArgs["depth_reduce"] == "sum":
            # do sum
            pass

    # reduce data along time, here mandatory
    if ("assigned_time"
            in datamodel.dims) and (len(datamodel["assigned_time"]) > 1):
        warnings.warn("input dataset has more than one time record, " +
                      "performing non-weighted average")
        datamodel = simple_average(datamodel, "assigned_time")
    if ("assigned_time" in dataobs.dims) and len(dataobs["assigned_time"]) > 1:
        warnings.warn("reference dataset has more than one time record, " +
                      "performing non-weighted average")
        dataobs = simple_average(dataobs, "assigned_time")

    datamodel = datamodel.squeeze()
    dataobs = dataobs.squeeze()

    # check final data is 2d
    assert len(datamodel.dims) == 2
    assert len(dataobs.dims) == 2

    # check consistency of coordinates
    assert np.allclose(datamodel["assigned_lon"], dataobs["assigned_lon"])
    assert np.allclose(datamodel["assigned_lat"], dataobs["assigned_lat"])

    # homogeneize coords
    dataobs = copy_coordinates(datamodel, dataobs,
                               ["assigned_lon", "assigned_lat"])

    # restrict model to where obs exists
    datamodel = datamodel.where(dataobs)

    # dump values
    model = datamodel.to_masked_array()
    obs = dataobs.to_masked_array()
    x = datamodel["assigned_lon"].values
    y = datamodel["assigned_lat"].values

    # compute area
    if "areacello" in dsmodel.variables:
        area = dsmodel["areacello"].values
    else:
        if model.shape == (180, 360):
            area = compute_area_regular_grid(dsmodel)
        else:
            raise IOError("no cell area provided")

    return x, y, area, model, obs
def read(dictArgs):
    """ read data from model and obs files, process data and return it """

    if dictArgs["config"] is not None:
        # use dataset from catalog, either from command line or default
        cat = open_intake_catalog(dictArgs["platform"], dictArgs["config"])
        ds_static = cat["ocean_static_1x1"].to_dask()

    if dictArgs["static"] is not None:
        ds_static = xr.open_dataset(dictArgs["static"])

    # Compute basin codes
    codes = generate_basin_codes(ds_static, lon="lon", lat="lat")
    codes = np.array(codes)

    # depth coordinate
    if "deptho" in list(ds_static.variables):
        depth = ds_static.deptho.to_masked_array()
    elif "depth" in list(ds_static.variables):
        depth = ds_static.depth.to_masked_array()
    else:
        raise ValueError("Unable to find depth field.")
    depth = np.where(np.isnan(depth), 0.0, depth)
    depth = depth * -1.0

    dsmodel = xr.open_mfdataset(dictArgs["infile"],
                                combine="by_coords",
                                use_cftime=True)

    if dictArgs["obsfile"] is not None:
        # priority to user-provided obs file
        dsobs = xr.open_mfdataset(dictArgs["obsfile"],
                                  combine="by_coords",
                                  decode_times=False)
    else:
        # use dataset from catalog, either from command line or default
        cat = open_intake_catalog(dictArgs["platform"], "obs")
        dsobs = cat[dictArgs["dataset"]].to_dask()

    # read in model and obs data
    datamodel = read_data(dsmodel, dictArgs["possible_variable_names"])
    dataobs = read_data(dsobs, dictArgs["possible_variable_names"])

    # reduce data along time, here mandatory
    if ("assigned_time"
            in datamodel.dims) and (len(datamodel["assigned_time"]) > 1):
        warnings.warn("input dataset has more than one time record, " +
                      "performing non-weighted average")
        datamodel = simple_average(datamodel, "assigned_time")
    if ("assigned_time" in dataobs.dims) and len(dataobs["assigned_time"]) > 1:
        warnings.warn("reference dataset has more than one time record, " +
                      "performing non-weighted average")
        dataobs = simple_average(dataobs, "assigned_time")

    datamodel = datamodel.squeeze()
    dataobs = dataobs.squeeze()

    # check final data is 3d
    assert len(datamodel.dims) == 3
    assert len(dataobs.dims) == 3

    # check consistency of coordinates
    assert np.allclose(datamodel["assigned_lon"], dataobs["assigned_lon"])
    assert np.allclose(datamodel["assigned_lat"], dataobs["assigned_lat"])

    # homogeneize coords
    dataobs = copy_coordinates(datamodel, dataobs,
                               ["assigned_lon", "assigned_lat"])

    # restrict model to where obs exists
    datamodel = datamodel.where(dataobs)

    # dump values
    model = datamodel.to_masked_array()
    obs = dataobs.to_masked_array()
    x = datamodel["assigned_lon"].values
    y = datamodel["assigned_lat"].values
    z = datamodel["assigned_depth"].values

    # convert z to negative values
    z = z * -1

    # compute area
    if "areacello" in dsmodel.variables:
        area = dsmodel["areacello"].values
    else:
        if (model.shape[-2], model.shape[-1]) == (180, 360):
            area = compute_area_regular_grid(dsmodel)
        else:
            raise IOError("no cell area provided")

    # date range
    dates = date_range(dsmodel)

    return y, z, depth, area, codes, model, obs, dates
示例#3
0
def test_copy_coordinates():
    from om4labs.om4common import copy_coordinates

    da = xr.DataArray(data=np.empty((360, 2)), dims=("lon", "bnds"))
    da = copy_coordinates(grid_1x1["lon_bnds"], da, ["lon", "bnds"])
    assert list(da.coords).sort() == list(grid_1x1["lon_bnds"].coords).sort()