def test_decomp_sumcomp(adv, avg_dims_error=None, thresh=0.9999999999, loc=None, iloc=None, plot=True, **plot_kws): """Test that the total advective tendency is indeed the sum of the mean and resolved turbulent components in all three spatial directions. The test fails if the Nash-Sutcliffe efficiency coefficient (NSE) is below the given threshold. If avg_dims_error is given, the averaging in the NSE calculation is only carried out over these dimensions. Afterwards the minimum NSE value is taken over the remaining dimensions. Parameters ---------- adv : xarray DataArray Advective tendencies. avg_dims_error : str or list of str, optional Dimensions over which to calculate the NSE. The default is None. thresh : float, optional Threshold value for NSE below which the test fails loc : dict, optional Mapping for label based indexing before running the test. The default is None. iloc : dict, optional Mapping for integer-location based indexing before running the test. The default is None. plot : bool, optional Create scatter plot if test fails. The default is True. **plot_kws : keyword arguments passed to plotting.scatter_hue. Returns ------- failed : bool Test failed. err : float Test statistic NSE """ failed = False err = [] for ID in adv.ID.values: ref = adv.sel(ID=ID, comp="adv_r") dat = adv.sel(ID=ID, comp=["mean", "trb_r"]).sum("comp") dat = tools.loc_data(dat, loc=loc, iloc=iloc) ref = tools.loc_data(ref, loc=loc, iloc=iloc) e = tools.nse(dat, ref, dim=avg_dims_error).min().values err.append(e) if e < thresh: log = "decomp_sumcomp, {} (XYZ) for ID={}: min. NSE less than {}: {:.11f}".format( dat.description, ID, thresh, e) print(log) if plot: ref.name = "adv_r" dat.name = "mean + trb_r" plotting.scatter_hue(dat, ref, title=log, **plot_kws) failed = True return failed, min(err)
def test_2nd(adv, avg_dims_error=None, thresh=0.999, loc=None, iloc=None, plot=True, **plot_kws): """Test that the advective tendencies resulting from 2nd-order and correct advection order are equal in all three directions and components (usually carried out if correct order is equal to 2nd order). The test fails if the Nash-Sutcliffe efficiency coefficient (NSE) is below the given threshold. If avg_dims_error is given, the averaging in the NSE calculation is only carried out over these dimensions. Afterwards the minimum NSE value is taken over the remaining dimensions. Parameters ---------- adv : xarray DataArray Advective tendencies. avg_dims_error : str or list of str, optional Dimensions over which to calculate the NSE. The default is None. thresh : float, optional Threshold value for NSE below which the test fails loc : dict, optional Mapping for label based indexing before running the test. The default is None. iloc : dict, optional Mapping for integer-location based indexing before running the test. The default is None. plot : bool, optional Create scatter plot if test fails. The default is True. **plot_kws : keyword arguments passed to plotting.scatter_hue. Returns ------- failed : bool Test failed. err : float Test statistic NSE """ failed = False ref = adv.sel(ID="cartesian") dat = adv.sel(ID="cartesian 2nd") dat = tools.loc_data(dat, loc=loc, iloc=iloc) ref = tools.loc_data(ref, loc=loc, iloc=iloc) err = tools.nse(dat, ref, dim=avg_dims_error).min().values if err < thresh: log = "test_2nd, {} (XYZ): min. NSE less than {}: {:.5f}".format( dat.description, thresh, err) print(log) if plot: ref.name = "correct order" dat.name = "2nd order" plotting.scatter_hue(dat, ref, title=log, **plot_kws) failed = True return failed, err
def test_decomp_sumdir(adv, corr, avg_dims_error=None, thresh=0.99999, loc=None, iloc=None, plot=True, **plot_kws): """ Test that budget methods "native" and "cartesian" give equal advective tendencies in all components if the three spatial directions are summed up. The test fails if the coefficient of determination is below the given threshold. If avg_dims_error is given, the averaging in the R2 calculation is only carried out over these dimensions. Afterwards the minimum R2 value is taken over the remaining dimensions. Parameters ---------- adv : xarray DataArray Advective tendencies. corr : xarray DataArray Cartesian corrections for advective tendencies. avg_dims_error : str or list of str, optional Dimensions over which to calculate the R2. The default is None. thresh : float, optional Threshold value for R2 below which the test fails loc : dict, optional Mapping for label based indexing before running the test. The default is None. iloc : dict, optional Mapping for integer-location based indexing before running the test. The default is None. plot : bool, optional Create scatter plot if test fails. The default is True. **plot_kws : keyword arguments passed to plotting.scatter_hue. Returns ------- failed : bool Test failed. err : float Test statistic R2 """ data = adv.sel(dir="sum", comp=corr.comp) ref = data.sel(ID="native") dat = data.sel(ID="cartesian") - corr.sel(ID="cartesian", dir="T") dat = tools.loc_data(dat, loc=loc, iloc=iloc) ref = tools.loc_data(ref, loc=loc, iloc=iloc) err = R2(dat, ref, dim=avg_dims_error).min().values failed = False if err < thresh: log = "test_decomp_sumdir, {}: min. R2 less than {}: {:.7f}".format(dat.description, thresh, err) print(log) if plot: dat.name = "cartesian" ref.name = "native" plotting.scatter_hue(dat, ref, title=log, **plot_kws) failed = True return failed, err
def test_dz_out(adv, avg_dims_error=None, thresh=0.95, loc=None, iloc=None, plot=True, **plot_kws): """Test that the Cartesian corrections imposed by the budget methods "cartesian" and "cartesian dz_out_z" lead to similar advective tendencies in all three directions and components. The test fails if the coefficient of determination is below the given threshold. If avg_dims_error is given, the averaging in the R2 calculation is only carried out over these dimensions. Afterwards the minimum R2 value is taken over the remaining dimensions. Parameters ---------- adv : xarray DataArray Advective tendencies. avg_dims_error : str or list of str, optional Dimensions over which to calculate the R2. The default is None. thresh : float, optional Threshold value for R2 below which the test fails loc : dict, optional Mapping for label based indexing before running the test. The default is None. iloc : dict, optional Mapping for integer-location based indexing before running the test. The default is None. plot : bool, optional Create scatter plot if test fails. The default is True. **plot_kws : keyword arguments passed to plotting.scatter_hue. Returns ------- failed : bool Test failed. err : float Test statistic R2 """ failed = False ref = adv.sel(ID="cartesian") dat = adv.sel(ID="cartesian dz_out_z") dat = tools.loc_data(dat, loc=loc, iloc=iloc) ref = tools.loc_data(ref, loc=loc, iloc=iloc) err = R2(dat, ref, dim=avg_dims_error).min().values if err < thresh: log = "test_dz_out, {} (XYZ): min. R2 less than {}: {:.5f}".format(dat.description, thresh, err) print(log) if plot: dat.name = "dz_out_z" ref.name = "reference corr." plotting.scatter_hue(dat, ref, title=log, **plot_kws) failed = True return failed, err
def test_w(dat_inst, avg_dims_error=None, thresh=0.995, loc=None, iloc=None, plot=True, **plot_kws): """Test that the instantaneous vertical velocity is very similar to the instantaneous diagnosed vertical velocity used in the tendency calculations. The test fails if the Nash-Sutcliffe efficiency coefficient (NSE) is below the given threshold. If avg_dims_error is given, the averaging in the NSE calculation is only carried out over these dimensions. Afterwards the minimum NSE value is taken over the remaining dimensions. Parameters ---------- adv : xarray DataArray Advective tendencies. avg_dims_error : str or list of str, optional Dimensions over which to calculate the NSE. The default is None. thresh : float, optional Threshold value for NSE below which the test fails loc : dict, optional Mapping for label based indexing before running the test. The default is None. iloc : dict, optional Mapping for integer-location based indexing before running the test. The default is None. plot : bool, optional Create scatter plot if test fails. The default is True. **plot_kws : keyword arguments passed to plotting.scatter_hue. Returns ------- failed : bool Test failed. err : float Test statistic NSE """ dat_inst = tools.loc_data(dat_inst, loc=loc, iloc=iloc) ref = dat_inst["W"] dat = dat_inst["W_DIAG"] err = tools.nse(dat, ref, dim=avg_dims_error).min().values failed = False if err < thresh: log = "test_w: min. NSE less than {}: {:.5f}".format(thresh, err) print(log) if plot: plotting.scatter_hue(dat, ref, title=log, **plot_kws) failed = True return failed, err
def scatter_hue(dat, ref, plot_diff=False, hue="bottom_top", ignore_missing_hue=True, discrete=True, cmap="gnuplot", iloc=None, loc=None, savefig=False, fname=None, figloc=None, close=False, title=None, **kwargs): """Scatter plot of dat vs. ref with coloring based on hue variable. Parameters ---------- dat : xarray DataArray Data plotted on y-axis. ref : xarray DataArray Reference data plotted on x-axis. plot_diff : bool, optional Plot the difference dat-ref against ref. The default is False. hue : str, optional Hue variable. All xarray dimensions are allowed. "_stag" is automatically appended, if necessary. The default is "bottom_top". ignore_missing_hue : bool, optional If hue variable is not available, use default instead of raising an exception. The default is True. discrete : bool, optional Use discrete colorbar with labels for all coordinate values. cmap : bool, optional Colormap for the plot. The default is 'gnuplot'. loc : dict, optional Mapping for label based indexing before plotting. The default is None. iloc : dict, optional Mapping for integer-location based indexing before plotting. The default is None. savefig : bool, optional Save figure to disk. The default is False. fname : str, optional File name of plot if savefig=True. If no file type extension is included, use png. The default is None. figloc : str or path-like, optional Directory to save plot in. Defaults to the directory of this script. close : bool, optional Close the figure after creation. The default is False. title : str, optional Title of the plot. The default is None. **kwargs : keyword argument passed to plt.scatter. Returns ------- fig : matplotlib figure ax : matplotlib axes cax : matplotlib axes Colorbar axes. """ dat = tools.loc_data(dat, loc=loc, iloc=iloc) ref = tools.loc_data(ref, loc=loc, iloc=iloc) pdat = xr.concat([dat, ref], "concat_dim") if plot_diff: pdat[0] = dat - ref if ignore_missing_hue: if ((hue not in pdat.dims) and (hue + "_stag" not in pdat.dims)): hue = "bottom_top" if (hue not in pdat.dims) and (hue + "_stag" in pdat.dims): hue = hue + "_stag" # create integer hue variable to allow non-numeric hue variables n_hue = len(pdat[hue]) hue_int = np.arange(n_hue) pdat = pdat.assign_coords(hue=(hue, hue_int)) pdatf = pdat[0].stack(s=pdat[0].dims) # set color if ("bottom_top" in hue) and (not discrete): color = -pdatf[hue] elif (hue == "Time") and (not discrete): # use integer hue variable to prevent error color = pdatf["hue"] else: color = pdatf[hue] try: color.astype(int) # check if hue is numeric except ValueError: discrete = True if discrete: cmap = plt.get_cmap(cmap, n_hue) discrete = True # use integer hue variable to prevent error color = pdatf["hue"] kwargs.setdefault("cmap", cmap) fig, ax = plt.subplots() kwargs.setdefault("s", 10) p = plt.scatter(pdat[1], pdat[0], c=color.values, **kwargs) # set x and y labels labels = [] for d in [ref, dat]: label = "" if d.name is not None: label = d.name elif "description" in d.attrs: label = d.description labels.append(label) if plot_diff and (labels[0] != "") and (labels[1] != ""): labels[1] = "{} - {}".format(labels[1], labels[0]) # add units for i, d in enumerate([ref, dat]): if (labels[i] != "") and ("units" in d.attrs): labels[i] += " ({})".format(d.units) plt.xlabel(labels[0]) plt.ylabel(labels[1]) for i in [0, 1]: pdat = pdat.where(~pdat[i].isnull()) # set axis limits equal for x and y axis if not plot_diff: minmax = [pdat.min(), pdat.max()] # tak full range of data increased by 3% dist = minmax[1] - minmax[0] minmax[0] -= 0.03 * dist minmax[1] += 0.03 * dist plt.plot(minmax, minmax, c="gray", label="1:1") plt.legend() ax.set_xlim(minmax) ax.set_ylim(minmax) # colorbar cax = fig.add_axes([0.92, 0.125, 0.05, .75], frameon=True) cax.set_yticks([]) cax.set_xticks([]) clabel = hue if "bottom_top" in hue: clabel = "$\eta$" if ("bottom_top" in hue) and (not discrete): cb = plt.colorbar(p, cax=cax, label=clabel) # highest value must be at bottom cb.set_ticks(np.arange(-0.8, -0.2, 0.2)) cb.set_ticklabels(np.linspace(0.8, 0.2, 4).round(1)) else: cb = plt.colorbar(p, cax=cax, label=clabel) if discrete: # set ticks for all hue values if n_hue > 1: d = (n_hue - 1) / n_hue cb.set_ticks(np.arange(d / 2, n_hue - 1, d)) else: cb.set_ticks([0]) cb.set_ticklabels(pdat[hue].values) # labels for error stats err = abs(dat - ref) rmse = (err**2).mean().values**0.5 r = testing.R2(dat, ref) ax.text(0.74, 0.07, "RMSE={0:.2E}\nR$^2$={1:.7f}".format(rmse, r.values), horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes) if title is not None: fig.suptitle(title) if savefig: if figloc is None: figloc = Path(__file__).parent else: os.makedirs(figloc, exist_ok=True) if fname is None: fname = "scatter" fpath = Path(figloc) / fname try: fig.savefig(fpath, dpi=300, bbox_inches="tight") except ValueError: fig.savefig(str(fpath) + ".png", dpi=300, bbox_inches="tight") plt.show(block=False) if close: plt.close() return fig, ax, cax
def run_tests(datout, tests, dat_mean=None, dat_inst=None, sim_id="", trb_exp=False, hor_avg=False, chunks=None, figloc=None, **kw): """Run test functions for WRF output postprocessed with WRFlux. Thresholds are hard-coded. Parameters ---------- datout : nested dict Postprocessed output for all variables. tests : list of str Tests to perform. Choices: testing.all_tests dat_mean : xarray Dataset WRF time-averaged output. dat_inst : xarray DataArray, optional WRF instantaneous output needed for w test. The default is None. sim_id : str, optional ID of the current test simulation. The default is "". trb_exp : bool, optional Turbulent fluxes were calculated explicitly. The default is False. hor_avg : bool, optional Horizontal averaging was used in postprocessing. The default is False. chunks : dict of integers, optional Mapping from dimension "x" and/or "y" to chunk sizes used in postprocessing. If given, the boundaries in the chunking directions are pruned. The default is None. figloc : str or path-like, optional Directory to save plot in. Defaults to the parent directory of this script. **kw : Keyword arguments passed to test functions. Returns ------- failed : pandas DataFrame "FAIL" and "pass" labels for all tests and variables. err : pandas DataFrame R2 error statistics for performed tests. """ if tests is None: tests = all_tests else: #drop duplicates tests = list(set(tests)) tests = tests.copy() for test in tests: if test not in all_tests: raise ValueError("Test {} not available! Available tests:\n{}".format(test, ", ".join(all_tests))) variables = list(datout.keys()) failed = pd.DataFrame(columns=tests, index=variables) err = pd.DataFrame(columns=tests, index=variables) failed[:] = "" err[:] = "" # cut boundaries for non-periodic BC or if chunking was used attrs = datout[variables[0]]["flux"].attrs iloc = {} if (not attrs["PERIODIC_X"]) or (chunks is not None and "x" in chunks): iloc["x"] = slice(1, -1) if (not attrs["PERIODIC_Y"]) or (chunks is not None and "y" in chunks): iloc["y"] = slice(1, -1) if attrs["PERIODIC_Y"] == 0: if "Y=0" in tests: tests.remove("Y=0") cyclic = {d: bool(attrs["PERIODIC_{}".format(d.upper())]) for d in tools.xy} cyclic["bottom_top"] = False avg_dims = None if hor_avg: avg_dims = [] dat = datout[variables[0]]["tend"]["adv"] for d in tools.xy: if (d not in dat.dims) and (d + "_stag" not in dat.dims): avg_dims.append(d) # for w test: cut first time step if dat_inst is not None: dat_inst_lim = dat_inst.isel(Time=slice(1, None), **iloc) elif ("w" in tests) or ("dim_coords" in tests): raise ValueError("For tests 'w' and 'dim_coords', dat_inst needs to be given!") datout_lim = {} for v, datout_v in datout.items(): datout_lim[v] = {} for n, dat in datout_v.items(): if "ID" in dat.dims: IDs = [] for ID in dat.ID.values: ID = ID.split(" ") IDs.append(" ".join(ID)) dat["ID"] = IDs if "dim_coords" in tests: test_dim_coords(dat, dat_inst, v, n, failed) if hor_avg: for avg_dim in avg_dims: for stag in ["", "_stag"]: assert avg_dim + stag not in dat.dims datout_lim[v][n] = tools.loc_data(dat, iloc=iloc) if figloc is None: fpath = Path(__file__).parent else: fpath = Path(figloc) for var, datout_v in datout_lim.items(): print("Variable: " + var) figloc = fpath / "figures" / var failed_i = {} err_i = {} if dat_mean is not None: dat_mean_v = dat_mean.sel(Time=datout_v["tend"]["Time"]) if "budget" in tests: tend = datout_v["tend"]["net"].sel(side="tendency") forcing = datout_v["tend"]["net"].sel(side="forcing") kw["figloc"] = figloc / "budget" if (var == "w") and ("open BC y hor_avg" in sim_id): kw["thresh"] = 0.995 elif (var in ["u", "v", "w"]) and ("open BC" in sim_id): kw["thresh"] = 0.999 elif var == "t": if "open BC" in sim_id: kw["thresh"] = 0.999 if "symmetric BC" in sim_id: kw["thresh"] = 0.995 elif attrs["USE_THETA_M"] == 1: if attrs["OUTPUT_DRY_THETA_FLUXES"] == 0: # lower thresh as cartesian tendency for thm is close to 0 if attrs["MP_PHYSICS"] > 0: kw["thresh_cartesian"] = 0.96 kw["thresh"] = 0.9998 else: kw["thresh_cartesian"] = 0.995 # reduce threshold for WENO and monotonic advection as # dry theta budget is not perfectly closed elif (attrs["SCALAR_ADV_OPT"] >= 3) or (attrs["MOIST_ADV_OPT"] >= 3): kw["thresh"] = 0.88 elif attrs["MOIST_ADV_OPT"] == 2: kw["thresh"] = 0.96 failed_i["budget"], err_i["budget"] = test_budget(tend, forcing, **kw) for thresh in ["thresh", "thresh_cartesian"]: if thresh in kw: del kw[thresh] adv = datout_v["tend"]["adv"] if "decomp_sumdir" in tests: if attrs["HESSELBERG_AVG"] == 0: kw["thresh"] = 0.995 elif trb_exp: kw["thresh"] = 0.999 kw["figloc"] = figloc / "decomp_sumdir" failed_i["decomp_sumdir"], err_i["decomp_sumdir"] = test_decomp_sumdir( adv, datout_v["corr"], **kw) if "thresh" in kw: del kw["thresh"] if "decomp_sumcomp" in tests: if trb_exp: # reduce threshold for explicit turbulent fluxes kw["thresh"] = 0.999 kw["figloc"] = figloc / "decomp_sumcomp" failed_i["decomp_sumcomp"], err_i["decomp_sumcomp"] = test_decomp_sumcomp(adv, **kw) if "thresh" in kw: del kw["thresh"] if ("dz_out" in tests) and (var != "q"): # TODOm: why so bad for q? kw["figloc"] = figloc / "dz_out" adv_noavgdir = adv if hor_avg: thresh = {"t": 0.85, "u": 0.7, "v": 0.995, "w": 0.92} kw["thresh"] = thresh[var] adv_noavgdir = adv.sel(dir=[d for d in adv.dir.values if d.lower() not in avg_dims]) failed_i["dz_out"], err_i["dz_out"] = test_dz_out(adv_noavgdir, **kw) if "thresh" in kw: del kw["thresh"] if "adv_2nd" in tests: kw["figloc"] = figloc / "adv_2nd" failed_i["adv_2nd"], err_i["adv_2nd"] = test_2nd(adv, **kw) if ("w" in tests) and (var == variables[-1]): # only do test once: for last variable kw["figloc"] = figloc / "w" failed_i["w"], err_i["w"] = test_w(dat_inst_lim, **kw) if ("mass" in tests) and (var == "t"): if "dz_out" in tests: if hor_avg: kw["thresh"] = 0.85 else: kw["thresh"] = 0.995 elif attrs["HESSELBERG_AVG"] == 0: kw["thresh"] = 0.99998 kw["figloc"] = figloc / "mass" failed_i["mass"], err_i["mass"] = test_mass(datout_v["tend_mass"], **kw) if "thresh" in kw: del kw["thresh"] if "adv_form" in tests: kw["figloc"] = figloc / "adv_form" if var in ["u", "w"]: kw["thresh"] = 0.995 if dat_mean is None: raise ValueError("For adv_form test, dat_mean needs to be given!") failed_i["adv_form"], err_i["adv_form"] = test_adv_form(dat_mean_v, datout_v, var, cyclic, hor_avg=hor_avg, avg_dims=avg_dims, **kw) if "thresh" in kw: del kw["thresh"] if "periodic" in tests: kw["figloc"] = figloc / "mass" failed_i["periodic"] = test_periodic(datout_v, attrs, **kw) if "NaN" in tests: failed_i["NaN"] = test_nan(datout_v) if "sgs" in tests: sgs_sum = datout_v["tend"]["adv"].sel(comp="trb_s").sum("dir") if np.allclose(sgs_sum[0], sgs_sum[1], atol=1e-7, rtol=1e-5): failed_i["sgs"] = False else: failed_i["sgs"] = True if hor_avg and ("Y=0" in tests): failed_i["Y=0"], err_i["Y=0"] = test_y0(adv) # store results for test, f in failed_i.items(): if f: failed.loc[var, test] = "FAIL" else: failed.loc[var, test] = "pass" for test, e in err_i.items(): err.loc[var, test] = e return failed, err
def test_adv_form(dat_mean, datout, var, cyclic=None, hor_avg=False, avg_dims=None, avg_dims_error=None, thresh=0.9995, loc=None, iloc=None, plot=True, **plot_kws): """Compare implicit and explicit advective form calculations Explicitly calculate 2nd order mean advection in advective form and compare with implicit calculation. Parameters ---------- dat_mean : xarray Dataset WRF time-averaged output. datout : dict Postprocessed output for variable var. var : str Variable to process. cyclic : dict of booleans for xy or None, optional Defines which dimensions have periodic boundary conditions. Use periodic boundary conditions to fill lateral boundary points. The default is None. hor_avg : bool, optional Horizontal averaging was used in postprocessing. The default is False. avg_dims : str or list of str, optional Averaging dimensions if hor_avg=True. The default is None. avg_dims_error : str or list of str, optional Dimensions over which to calculate the R2. The default is None. thresh : float, optional Threshold value for R2 below which the test fails loc : dict, optional Mapping for label based indexing before running the test. The default is None. iloc : dict, optional Mapping for integer-location based indexing before running the test. The default is None. plot : bool, optional Create scatter plot if test fails. The default is True. **plot_kws : keyword arguments passed to plotting.scatter_hue. Returns ------- failed : bool Test failed. err : float Test statistic R2 """ adv, flux, grid = datout["tend"]["adv"], datout["flux"], datout["grid"] dat_mean["bottom_top"] = flux["bottom_top"] dat_mean["bottom_top_stag"] = flux["bottom_top_stag"] vmean = xr.Dataset({"X": dat_mean["U_MEAN"], "Y": dat_mean["V_MEAN"], "Z": dat_mean["WD_MEAN"]}) if var == "w": v = "ZWIND" else: v = var.upper() var_mean = dat_mean[v + "_MEAN"] if hor_avg: var_mean = tools.avg_xy(var_mean, avg_dims, cyclic=cyclic) vmean_c = xr.Dataset() dd = xr.Dataset() grad = xr.Dataset() tend = xr.Dataset() for dim in tools.XYZ: if hor_avg: vmean[dim] = tools.avg_xy(vmean[dim], avg_dims, cyclic=cyclic, **grid[tools.stagger_const]) ds = dim.lower() if dim == "Z": ds = "bottom_top" cyc = cyclic[ds] d = ds if ds in var_mean.dims: ds = ds + "_stag" else: d = d + "_stag" if dim == "Z": dd[dim] = tools.diff(grid["Z_STAG"], d, new_coord=flux[ds], cyclic=cyc) else: dd[dim] = grid["D" + dim] if d in adv.dims: grad[dim] = tools.diff(var_mean, d, new_coord=flux[ds], cyclic=cyc) / dd[dim] vmean_c[dim] = tools.stagger_like(vmean[dim], ref=grad[dim], cyclic=cyclic, **grid[tools.stagger_const]) for dim in tools.XYZ: if dim in grad: adv_s = - vmean_c[dim] * grad[dim] tend[dim] = tools.stagger_like(adv_s, ref=adv, cyclic=cyclic, **grid[tools.stagger_const]) for dim in ["X", "Y"]: if dim in grad: corr = grid[f"dzdt_{dim.lower()}"] corr = grad["Z"]*tools.stagger_like(corr, ref=grad["Z"], cyclic=cyclic, **grid[tools.stagger_const]) corr = tools.stagger_like(corr, ref=adv, cyclic=cyclic, **grid[tools.stagger_const]) tend[dim] = tend[dim] - corr tend = tend.to_array("dir") fname = None if "fname" in plot_kws: fname = plot_kws.pop("fname") dat = tools.loc_data(adv.sel(ID="cartesian adv_form", dir=["X", "Y", "Z"], comp="mean"), loc=loc, iloc=iloc) ref = tools.loc_data(tend, loc=loc, iloc=iloc) dat = dat.sel(dir=ref.dir) if var == "w": dat = dat.isel(bottom_top_stag=slice(1, None)) ref = ref.isel(bottom_top_stag=slice(1, None)) err = R2(dat, ref, dim=avg_dims_error).min().values failed = False if err < thresh: failed = True log = "test_adv_form: mean advective component: min. R2 less than {}: {:.10f}".format(thresh, err) print(log) if plot: dat.name = "Implicit calculation" ref.name = "Explicit calculation" if fname is not None: log = fname + "\n" + log plotting.scatter_hue(dat, ref, title=log, fname=fname, **plot_kws) return failed, err
def test_mass(tend_mass, avg_dims_error=None, thresh=0.99999999, loc=None, iloc=None, plot=True, **plot_kws): """Test closure of continuity equation. In the tendency calculations the vertical component of the continuity equation is calculated as residual to improve the budget closure which leads to automatic closure of the continuity equation. This test ensures that this residual calculation does not produce larger changes in the vertical component by comparing the residual calculation with the explicit calculation which uses the vertical velocity. For the dz_out type formulations, the continuity equation cannot be well closed. Therefore, we only compare the individual components with the standard Cartesian formulation. The test fails if the coefficient of determination is below the given threshold. If avg_dims_error is given, the averaging in the R2 calculation is only carried out over these dimensions. Afterwards the minimum R2 value is taken over the remaining dimensions. Parameters ---------- tend_mass : xarray DataArray Components of continuity equation. avg_dims_error : str or list of str, optional Dimensions over which to calculate the R2. The default is None. thresh : float, optional Threshold value for R2 below which the test fails loc : dict, optional Mapping for label based indexing before running the test. The default is None. iloc : dict, optional Mapping for integer-location based indexing before running the test. The default is None. plot : bool, optional Create scatter plot if test fails. The default is True. **plot_kws : keyword arguments passed to plotting.scatter_hue. Returns ------- failed : bool Test failed. err : float Test statistic R2 """ ref = tend_mass.sel(dir="Z") dat = tend_mass.sel(dir="T") - tend_mass.sel(dir="X") - tend_mass.sel(dir="Y") failed = False err = [] fname = "" if "fname" in plot_kws: fname = plot_kws.pop("fname") for ID in dat.ID.values: dat_i = dat.sel(ID=ID) ref_i = ref.sel(ID=ID) dat_i = tools.loc_data(dat_i, loc=loc, iloc=iloc) ref_i = tools.loc_data(ref_i, loc=loc, iloc=iloc) e = R2(dat_i, ref_i, dim=avg_dims_error).min().values err.append(e) if e < thresh: log = "test_mass: vertical component of continuity equation\n for ID={}: min. R2 less than {}: {:.10f}".format(ID, thresh, e) print(log) if plot: dat_i.name = "Residual calculation" ref_i.name = "Calculation with vertical velocity" fname_i = fname if fname is not None: fname_i = "ID=" + ID + "_" + fname log = fname_i + "\n" + log plotting.scatter_hue(dat_i, ref_i, title=log, fname=fname_i, **plot_kws) failed = True return failed, min(err)
def test_budget(tend, forcing, avg_dims_error=None, thresh=0.9999, thresh_cartesian=None, budget_methods=("native", "adv_form", "cartesian", "cartesian adv_form"), loc=None, iloc=None, plot=True, **plot_kws): """ Test closure of budget: tend = forcing. The test fails if the coefficient of determination is below the given threshold. If avg_dims_error is given, the averaging in the R2 calculation is only carried out over these dimensions. Afterwards the minimum R2 value is taken over the remaining dimensions. Parameters ---------- tend : xarray DataArray Total tendency. forcing : xarray DataArray Total forcing. avg_dims_error : str or list of str, optional Dimensions over which to calculate the R2. The default is None. thresh : float, optional Threshold value for R2 below which the test fails thresh_cartesian : float, optional Use different threshold value for Cartesian coordinate system. The default is None, for which 'thresh' is used in both formulations. budget_methods : list of str Budget methods to consider. By default, only "native", "adv_form", "cartesian", and "cartesian adv_form" are tested. loc : dict, optional Mapping for label based indexing before running the test. The default is None. iloc : dict, optional Mapping for integer-location based indexing before running the test. The default is None. plot : bool, optional Create scatter plot if test fails. The default is True. **plot_kws : keyword arguments passed to plotting.scatter_hue. Returns ------- failed : bool Test failed. err : float Test statistic R2 """ failed = False err = [] fname = "" if "fname" in plot_kws: fname = plot_kws.pop("fname") for ID in budget_methods: thresh_i = thresh if (ID == "cartesian") and (thresh_cartesian is not None): thresh_i = thresh_cartesian if ID not in tend.ID: continue ref = tend.sel(ID=ID, drop=True) dat = forcing.sel(ID=ID, drop=True) dat = tools.loc_data(dat, loc=loc, iloc=iloc) ref = tools.loc_data(ref, loc=loc, iloc=iloc) e = R2(dat, ref, dim=avg_dims_error).min().values err.append(e) if e < thresh_i: log = "test_budget for ID='{}': min. R2 less than {}: {:.10f}\n".format(ID, thresh_i, e) print(log) if plot: dat.name = dat.description[:8] + "forcing" ref.name = ref.description fname_i = fname if fname is not None: fname_i = "ID=" + ID + "_" + fname log = fname_i + "\n" + log plotting.scatter_hue(dat, ref, title=log, fname=fname_i, **plot_kws) failed = True return failed, min(err)
def test_decomp_sumcomp(adv, avg_dims_error=None, thresh=0.999995, loc=None, iloc=None, plot=True, **plot_kws): """Test that the total advective tendency is indeed the sum of the mean and resolved turbulent components in all three spatial directions. The test fails if the coefficient of determination is below the given threshold. If avg_dims_error is given, the averaging in the R2 calculation is only carried out over these dimensions. Afterwards the minimum R2 value is taken over the remaining dimensions. Parameters ---------- adv : xarray DataArray Advective tendencies. avg_dims_error : str or list of str, optional Dimensions over which to calculate the R2. The default is None. thresh : float, optional Threshold value for R2 below which the test fails loc : dict, optional Mapping for label based indexing before running the test. The default is None. iloc : dict, optional Mapping for integer-location based indexing before running the test. The default is None. plot : bool, optional Create scatter plot if test fails. The default is True. **plot_kws : keyword arguments passed to plotting.scatter_hue. Returns ------- failed : bool Test failed. err : float Test statistic R2 """ ref = adv.sel(comp="trb_r") dat = adv.sel(comp="adv_r") - adv.sel(comp="mean") dat = tools.loc_data(dat, loc=loc, iloc=iloc) ref = tools.loc_data(ref, loc=loc, iloc=iloc) failed = False err = [] fname = "" if "fname" in plot_kws: fname = plot_kws.pop("fname") for ID in dat.ID.values: dat_i = dat.sel(ID=ID) ref_i = ref.sel(ID=ID) e = R2(dat_i, ref_i, dim=avg_dims_error).min().values err.append(e) if e < thresh: log = "decomp_sumcomp, {} (XYZ) for ID={}: min. R2 less than {}: {:.8f}".format( dat.description, ID, thresh, e) print(log) if plot: ref_i.name = "trb_r" dat_i.name = "adv_r - mean" fname_i = fname if fname is not None: fname_i = "ID=" + ID + "_" + fname log = fname_i + "\n" + log plotting.scatter_hue(dat_i, ref_i, title=log, fname=fname_i, **plot_kws) failed = True return failed, min(err)
def run_tests(datout, tests, dat_inst=None, sim_id=None, trb_exp=False, hor_avg=False, chunks=None, **kw): """Run test functions for WRF output postprocessed with WRFlux. Thresholds are hard-coded. Parameters ---------- datout : nested dict Postprocessed output for all variables. tests : list of str Tests to perform. Choices: budget, decomp_sumdir, decomp_sumcomp, dz_out, adv_2nd, w, Y=0, NaN dat_inst : xarray DataArray, optional WRF instantaneous output needed for w test. The default is None. sim_id : str, optional ID of the current test simulation. The default is None. trb_exp : bool, optional Turbulent fluxes were calculated explicitly. The default is False. hor_avg : bool, optional Horizontal averaging was used in postprocessing. The default is False. chunks : dict of integers, optional Mapping from dimension "x" and/or "y" to chunk sizes used in postprocessing. If given, the boundaries in the chunking directions are pruned. The default is None. **kw : Keyword arguments passed to test functions. Returns ------- failed : pandas DataFrame "FAIL" and "pass" labels for all tests and variables. err : pandas DataFrame NSE error statistics for performed tests. """ if tests is None: tests = all_tests variables = list(datout.keys()) failed = pd.DataFrame(columns=tests, index=variables) err = pd.DataFrame(columns=tests, index=variables) failed[:] = "" err[:] = "" # cut boundaries for non-periodic BC or if chunking was used attrs = datout[variables[0]]["flux"].attrs iloc = {} if (not attrs["PERIODIC_X"]) or (chunks is not None and "x" in chunks): iloc["x"] = slice(1, -1) if (not attrs["PERIODIC_Y"]) or (chunks is not None and "y" in chunks): iloc["y"] = slice(1, -1) if attrs["PERIODIC_Y"] == 0: if "Y=0" in tests: tests.remove("Y=0") # for w test: cut first time step dat_inst_lim = dat_inst.isel(Time=slice(1, None), **iloc) datout_lim = {} for v, datout_v in datout.items(): datout_lim[v] = {} for n, dat in datout_v.items(): if "dim_coords" in tests: test_dim_coords(dat, dat_inst, v, n, failed) datout_lim[v][n] = tools.loc_data(dat, iloc=iloc) fpath = Path(__file__).parent for var, datout_v in datout_lim.items(): print("Variable: " + var) figloc = fpath / "figures" / var failed_i = {} err_i = {} if "budget" in tests: # TODOm: change threshold depending on ID? tend = datout_v["tend"].sel(comp="tendency") forcing = datout_v["tend"].sel(comp="forcing") kw["figloc"] = figloc / "budget" if (var == "w") and ("open BC y hor_avg" in sim_id): kw["thresh"] = 0.99 elif (var == "t") and (attrs["USE_THETA_M"] == 1) and (attrs["OUTPUT_DRY_THETA_FLUXES"] == 1): # reduce threshold for WENO and monotonic advection as # dry theta budget is not perfectly closed if (attrs["SCALAR_ADV_OPT"] >= 3) and (attrs["MOIST_ADV_OPT"] >= 3): kw["thresh"] = 0.95 elif (attrs["SCALAR_ADV_OPT"] >= 3): kw["thresh"] = 0.7 elif attrs["MOIST_ADV_OPT"] == 2: kw["thresh"] = 0.98 failed_i["budget"], err_i["budget"] = test_budget( tend, forcing, **kw) if "thresh" in kw: del kw["thresh"] adv = datout_v["adv"] if "decomp_sumdir" in tests: if trb_exp or hor_avg or (attrs["HESSELBERG_AVG"] == 0): # reduce threshold kw["thresh"] = 0.992 kw["figloc"] = figloc / "decomp_sumdir" failed_i["decomp_sumdir"], err_i[ "decomp_sumdir"] = test_decomp_sumdir(adv, datout_v["corr"], **kw) if "thresh" in kw: del kw["thresh"] if "decomp_sumcomp" in tests: if trb_exp: # reduce threshold for explicit turbulent fluxes kw["thresh"] = 0.995 kw["figloc"] = figloc / "decomp_sumcomp" failed_i["decomp_sumcomp"], err_i[ "decomp_sumcomp"] = test_decomp_sumcomp(adv, **kw) if "thresh" in kw: del kw["thresh"] if ("dz_out" in tests) and (var != "q"): # TODOm: why so bad for q? kw["figloc"] = figloc / "dz_out" failed_i["dz_out"], err_i["dz_out"] = test_dz_out(adv, **kw) if "adv_2nd" in tests: kw["figloc"] = figloc / "adv_2nd" failed_i["adv_2nd"], err_i["adv_2nd"] = test_2nd(adv, **kw) if ("w" in tests) and (var == variables[-1]) and (dat_inst is not None): # only do test once: for last variable kw["figloc"] = figloc / "w" failed_i["w"], err_i["w"] = test_w(dat_inst_lim, **kw) if "NaN" in tests: failed_i["NaN"] = test_nan(datout_v) if hor_avg and ("Y=0" in tests): failed_i["Y=0"], err_i["Y=0"] = test_y0(adv) # store results for test, f in failed_i.items(): if f: failed.loc[var, test] = "FAIL" else: failed.loc[var, test] = "pass" for test, e in err_i.items(): err.loc[var, test] = e return failed, err
def test_budget(tend, forcing, avg_dims_error=None, thresh=0.9993, loc=None, iloc=None, plot=True, **plot_kws): """ Test closure of budget: tend = forcing. Only the budget methods "native" and "cartesian" are tested. The test fails if the Nash-Sutcliffe efficiency coefficient (NSE) is below the given threshold. If avg_dims_error is given, the averaging in the NSE calculation is only carried out over these dimensions. Afterwards the minimum NSE value is taken over the remaining dimensions. Parameters ---------- tend : xarray DataArray Total tendency. forcing : xarray DataArray Total forcing. avg_dims_error : str or list of str, optional Dimensions over which to calculate the NSE. The default is None. thresh : float, optional Threshold value for NSE below which the test fails loc : dict, optional Mapping for label based indexing before running the test. The default is None. iloc : dict, optional Mapping for integer-location based indexing before running the test. The default is None. plot : bool, optional Create scatter plot if test fails. The default is True. **plot_kws : keyword arguments passed to plotting.scatter_hue. Returns ------- failed : bool Test failed. err : float Test statistic NSE """ failed = False err = [] for ID in ["native", "cartesian"]: if ID not in tend.ID: continue ref = tend.sel(ID=ID, drop=True) dat = forcing.sel(ID=ID, drop=True) dat = tools.loc_data(dat, loc=loc, iloc=iloc) ref = tools.loc_data(ref, loc=loc, iloc=iloc) e = tools.nse(dat, ref, dim=avg_dims_error).min().values err.append(e) if e < thresh: log = "test_budget for ID='{}': min. NSE less than {}: {:.5f}\n".format( ID, thresh, e) print(log) if plot: dat.name = dat.description[:2] + "forcing" ref.name = ref.description plotting.scatter_hue(dat, ref, title=log, **plot_kws) failed = True return failed, min(err)