예제 #1
0
def test_decomp_sumcomp(adv,
                        avg_dims_error=None,
                        thresh=0.9999999999,
                        loc=None,
                        iloc=None,
                        plot=True,
                        **plot_kws):
    """Test that the total advective tendency is indeed the sum of the mean and
    resolved turbulent components in all three spatial directions.

    The test fails if the Nash-Sutcliffe efficiency coefficient (NSE)
    is below the given threshold. If avg_dims_error is given, the averaging in the
    NSE calculation is only carried out over these dimensions. Afterwards the minimum NSE
    value is taken over the remaining dimensions.


    Parameters
    ----------
    adv : xarray DataArray
        Advective tendencies.
    avg_dims_error : str or list of str, optional
        Dimensions over which to calculate the NSE. The default is None.
    thresh : float, optional
        Threshold value for NSE below which the test fails
    loc : dict, optional
        Mapping for label based indexing before running the test. The default is None.
    iloc : dict, optional
        Mapping for integer-location based indexing before running the test. The default is None.
    plot : bool, optional
        Create scatter plot if test fails. The default is True.
    **plot_kws :
        keyword arguments passed to plotting.scatter_hue.

    Returns
    -------
    failed : bool
        Test failed.
    err : float
        Test statistic NSE

    """
    failed = False
    err = []
    for ID in adv.ID.values:
        ref = adv.sel(ID=ID, comp="adv_r")
        dat = adv.sel(ID=ID, comp=["mean", "trb_r"]).sum("comp")
        dat = tools.loc_data(dat, loc=loc, iloc=iloc)
        ref = tools.loc_data(ref, loc=loc, iloc=iloc)
        e = tools.nse(dat, ref, dim=avg_dims_error).min().values
        err.append(e)
        if e < thresh:
            log = "decomp_sumcomp, {} (XYZ) for ID={}: min. NSE less than {}: {:.11f}".format(
                dat.description, ID, thresh, e)
            print(log)
            if plot:
                ref.name = "adv_r"
                dat.name = "mean + trb_r"
                plotting.scatter_hue(dat, ref, title=log, **plot_kws)
            failed = True
    return failed, min(err)
예제 #2
0
def test_2nd(adv,
             avg_dims_error=None,
             thresh=0.999,
             loc=None,
             iloc=None,
             plot=True,
             **plot_kws):
    """Test that the advective tendencies resulting from 2nd-order and
    correct advection order are equal in all three directions and components
    (usually carried out if correct order is equal to 2nd order).

    The test fails if the Nash-Sutcliffe efficiency coefficient (NSE)
    is below the given threshold. If avg_dims_error is given, the averaging in the
    NSE calculation is only carried out over these dimensions. Afterwards the minimum NSE
    value is taken over the remaining dimensions.


    Parameters
    ----------
    adv : xarray DataArray
        Advective tendencies.
    avg_dims_error : str or list of str, optional
        Dimensions over which to calculate the NSE. The default is None.
    thresh : float, optional
        Threshold value for NSE below which the test fails
    loc : dict, optional
        Mapping for label based indexing before running the test. The default is None.
    iloc : dict, optional
        Mapping for integer-location based indexing before running the test. The default is None.
    plot : bool, optional
        Create scatter plot if test fails. The default is True.
    **plot_kws :
        keyword arguments passed to plotting.scatter_hue.

    Returns
    -------
    failed : bool
        Test failed.
    err : float
        Test statistic NSE

    """
    failed = False
    ref = adv.sel(ID="cartesian")
    dat = adv.sel(ID="cartesian 2nd")
    dat = tools.loc_data(dat, loc=loc, iloc=iloc)
    ref = tools.loc_data(ref, loc=loc, iloc=iloc)
    err = tools.nse(dat, ref, dim=avg_dims_error).min().values
    if err < thresh:
        log = "test_2nd, {} (XYZ): min. NSE less than {}: {:.5f}".format(
            dat.description, thresh, err)
        print(log)
        if plot:
            ref.name = "correct order"
            dat.name = "2nd order"
            plotting.scatter_hue(dat, ref, title=log, **plot_kws)
        failed = True
    return failed, err
예제 #3
0
def test_decomp_sumdir(adv, corr, avg_dims_error=None, thresh=0.99999,
                       loc=None, iloc=None, plot=True, **plot_kws):
    """
    Test that budget methods "native" and "cartesian" give equal advective tendencies
    in all components if the three spatial directions are summed up.

    The test fails if the coefficient of determination
    is below the given threshold. If avg_dims_error is given, the averaging in the
    R2 calculation is only carried out over these dimensions. Afterwards the minimum R2
    value is taken over the remaining dimensions.


    Parameters
    ----------
    adv : xarray DataArray
        Advective tendencies.
    corr : xarray DataArray
        Cartesian corrections for advective tendencies.
    avg_dims_error : str or list of str, optional
        Dimensions over which to calculate the R2. The default is None.
    thresh : float, optional
        Threshold value for R2 below which the test fails
    loc : dict, optional
        Mapping for label based indexing before running the test. The default is None.
    iloc : dict, optional
        Mapping for integer-location based indexing before running the test. The default is None.
    plot : bool, optional
        Create scatter plot if test fails. The default is True.
    **plot_kws :
        keyword arguments passed to plotting.scatter_hue.

    Returns
    -------
    failed : bool
        Test failed.
    err : float
        Test statistic R2

    """
    data = adv.sel(dir="sum", comp=corr.comp)
    ref = data.sel(ID="native")
    dat = data.sel(ID="cartesian") - corr.sel(ID="cartesian", dir="T")
    dat = tools.loc_data(dat, loc=loc, iloc=iloc)
    ref = tools.loc_data(ref, loc=loc, iloc=iloc)
    err = R2(dat, ref, dim=avg_dims_error).min().values
    failed = False
    if err < thresh:
        log = "test_decomp_sumdir, {}: min. R2 less than {}: {:.7f}".format(dat.description, thresh, err)
        print(log)
        if plot:
            dat.name = "cartesian"
            ref.name = "native"
            plotting.scatter_hue(dat, ref, title=log, **plot_kws)
        failed = True
    return failed, err
예제 #4
0
def test_dz_out(adv, avg_dims_error=None, thresh=0.95, loc=None, iloc=None, plot=True, **plot_kws):
    """Test that the Cartesian corrections imposed by the budget methods
    "cartesian" and "cartesian dz_out_z" lead to
    similar advective tendencies in all three directions and components.

    The test fails if the coefficient of determination
    is below the given threshold. If avg_dims_error is given, the averaging in the
    R2 calculation is only carried out over these dimensions. Afterwards the minimum R2
    value is taken over the remaining dimensions.


    Parameters
    ----------
    adv : xarray DataArray
        Advective tendencies.
    avg_dims_error : str or list of str, optional
        Dimensions over which to calculate the R2. The default is None.
    thresh : float, optional
        Threshold value for R2 below which the test fails
    loc : dict, optional
        Mapping for label based indexing before running the test. The default is None.
    iloc : dict, optional
        Mapping for integer-location based indexing before running the test. The default is None.
    plot : bool, optional
        Create scatter plot if test fails. The default is True.
    **plot_kws :
        keyword arguments passed to plotting.scatter_hue.

    Returns
    -------
    failed : bool
        Test failed.
    err : float
        Test statistic R2

    """
    failed = False
    ref = adv.sel(ID="cartesian")
    dat = adv.sel(ID="cartesian dz_out_z")
    dat = tools.loc_data(dat, loc=loc, iloc=iloc)
    ref = tools.loc_data(ref, loc=loc, iloc=iloc)
    err = R2(dat, ref, dim=avg_dims_error).min().values
    if err < thresh:
        log = "test_dz_out, {} (XYZ): min. R2 less than {}: {:.5f}".format(dat.description, thresh, err)
        print(log)
        if plot:
            dat.name = "dz_out_z"
            ref.name = "reference corr."
            plotting.scatter_hue(dat, ref, title=log, **plot_kws)
        failed = True
    return failed, err
예제 #5
0
def test_w(dat_inst,
           avg_dims_error=None,
           thresh=0.995,
           loc=None,
           iloc=None,
           plot=True,
           **plot_kws):
    """Test that the instantaneous vertical velocity is very similar to the
    instantaneous diagnosed vertical velocity used in the tendency calculations.

    The test fails if the Nash-Sutcliffe efficiency coefficient (NSE)
    is below the given threshold. If avg_dims_error is given, the averaging in the
    NSE calculation is only carried out over these dimensions. Afterwards the minimum NSE
    value is taken over the remaining dimensions.


    Parameters
    ----------
    adv : xarray DataArray
        Advective tendencies.
    avg_dims_error : str or list of str, optional
        Dimensions over which to calculate the NSE. The default is None.
    thresh : float, optional
        Threshold value for NSE below which the test fails
    loc : dict, optional
        Mapping for label based indexing before running the test. The default is None.
    iloc : dict, optional
        Mapping for integer-location based indexing before running the test. The default is None.
    plot : bool, optional
        Create scatter plot if test fails. The default is True.
    **plot_kws :
        keyword arguments passed to plotting.scatter_hue.

    Returns
    -------
    failed : bool
        Test failed.
    err : float
        Test statistic NSE

    """
    dat_inst = tools.loc_data(dat_inst, loc=loc, iloc=iloc)
    ref = dat_inst["W"]
    dat = dat_inst["W_DIAG"]
    err = tools.nse(dat, ref, dim=avg_dims_error).min().values
    failed = False
    if err < thresh:
        log = "test_w: min. NSE less than {}: {:.5f}".format(thresh, err)
        print(log)
        if plot:
            plotting.scatter_hue(dat, ref, title=log, **plot_kws)
        failed = True
    return failed, err
예제 #6
0
def scatter_hue(dat, ref, plot_diff=False, hue="bottom_top", ignore_missing_hue=True,
                discrete=True, cmap="gnuplot", iloc=None, loc=None, savefig=False, fname=None, figloc=None,
                close=False, title=None, **kwargs):
    """Scatter plot of dat vs. ref with coloring based on hue variable.


    Parameters
    ----------
    dat : xarray DataArray
        Data plotted on y-axis.
    ref : xarray DataArray
        Reference data plotted on x-axis.
    plot_diff : bool, optional
        Plot the difference dat-ref against ref. The default is False.
    hue : str, optional
        Hue variable. All xarray dimensions are allowed. "_stag" is automatically appended,
        if necessary. The default is "bottom_top".
    ignore_missing_hue : bool, optional
        If hue variable is not available, use default instead of raising an exception.
        The default is True.
    discrete : bool, optional
        Use discrete colorbar with labels for all coordinate values.
    cmap : bool, optional
        Colormap for the plot. The default is 'gnuplot'.
    loc : dict, optional
        Mapping for label based indexing before plotting. The default is None.
    iloc : dict, optional
        Mapping for integer-location based indexing before plotting. The default is None.
    savefig : bool, optional
        Save figure to disk. The default is False.
    fname : str, optional
        File name of plot if savefig=True. If no file type extension is included, use png.
        The default is None.
    figloc : str or path-like, optional
        Directory to save plot in. Defaults to the directory of this script.
    close : bool, optional
        Close the figure after creation. The default is False.
    title : str, optional
        Title of the plot. The default is None.
    **kwargs :
        keyword argument passed to plt.scatter.

    Returns
    -------
    fig : matplotlib figure
    ax : matplotlib axes
    cax : matplotlib axes
        Colorbar axes.

    """
    dat = tools.loc_data(dat, loc=loc, iloc=iloc)
    ref = tools.loc_data(ref, loc=loc, iloc=iloc)
    pdat = xr.concat([dat, ref], "concat_dim")

    if plot_diff:
        pdat[0] = dat - ref

    if ignore_missing_hue:
        if ((hue not in pdat.dims) and (hue + "_stag" not in pdat.dims)):
            hue = "bottom_top"
    if (hue not in pdat.dims) and (hue + "_stag" in pdat.dims):
        hue = hue + "_stag"

    # create integer hue variable to allow non-numeric hue variables
    n_hue = len(pdat[hue])
    hue_int = np.arange(n_hue)
    pdat = pdat.assign_coords(hue=(hue, hue_int))
    pdatf = pdat[0].stack(s=pdat[0].dims)

    # set color
    if ("bottom_top" in hue) and (not discrete):
        color = -pdatf[hue]
    elif (hue == "Time") and (not discrete):
        # use integer hue variable to prevent error
        color = pdatf["hue"]
    else:
        color = pdatf[hue]
        try:
            color.astype(int)  # check if hue is numeric
        except ValueError:
            discrete = True
        if discrete:
            cmap = plt.get_cmap(cmap, n_hue)
            discrete = True
            # use integer hue variable to prevent error
            color = pdatf["hue"]

    kwargs.setdefault("cmap", cmap)

    fig, ax = plt.subplots()
    kwargs.setdefault("s", 10)
    p = plt.scatter(pdat[1], pdat[0], c=color.values, **kwargs)

    # set x and y labels
    labels = []
    for d in [ref, dat]:
        label = ""
        if d.name is not None:
            label = d.name
        elif "description" in d.attrs:
            label = d.description
        labels.append(label)
    if plot_diff and (labels[0] != "") and (labels[1] != ""):
        labels[1] = "{} - {}".format(labels[1], labels[0])
    # add units
    for i, d in enumerate([ref, dat]):
        if (labels[i] != "") and ("units" in d.attrs):
            labels[i] += " ({})".format(d.units)
    plt.xlabel(labels[0])
    plt.ylabel(labels[1])

    for i in [0, 1]:
        pdat = pdat.where(~pdat[i].isnull())
    # set axis limits equal for x and y axis
    if not plot_diff:
        minmax = [pdat.min(), pdat.max()]
        # tak full range of data increased by 3%
        dist = minmax[1] - minmax[0]
        minmax[0] -= 0.03 * dist
        minmax[1] += 0.03 * dist
        plt.plot(minmax, minmax, c="gray", label="1:1")
        plt.legend()
        ax.set_xlim(minmax)
        ax.set_ylim(minmax)

    # colorbar
    cax = fig.add_axes([0.92, 0.125, 0.05, .75], frameon=True)
    cax.set_yticks([])
    cax.set_xticks([])
    clabel = hue
    if "bottom_top" in hue:
        clabel = "$\eta$"
    if ("bottom_top" in hue) and (not discrete):
        cb = plt.colorbar(p, cax=cax, label=clabel)
        # highest value must be at bottom
        cb.set_ticks(np.arange(-0.8, -0.2, 0.2))
        cb.set_ticklabels(np.linspace(0.8, 0.2, 4).round(1))
    else:
        cb = plt.colorbar(p, cax=cax, label=clabel)
        if discrete:
            # set ticks for all hue values
            if n_hue > 1:
                d = (n_hue - 1) / n_hue
                cb.set_ticks(np.arange(d / 2, n_hue - 1, d))
            else:
                cb.set_ticks([0])
            cb.set_ticklabels(pdat[hue].values)

    # labels for error stats
    err = abs(dat - ref)
    rmse = (err**2).mean().values**0.5
    r = testing.R2(dat, ref)
    ax.text(0.74, 0.07, "RMSE={0:.2E}\nR$^2$={1:.7f}".format(rmse, r.values),
            horizontalalignment='left', verticalalignment='bottom', transform=ax.transAxes)
    if title is not None:
        fig.suptitle(title)

    if savefig:
        if figloc is None:
            figloc = Path(__file__).parent
        else:
            os.makedirs(figloc, exist_ok=True)
        if fname is None:
            fname = "scatter"
        fpath = Path(figloc) / fname
        try:
            fig.savefig(fpath, dpi=300, bbox_inches="tight")
        except ValueError:
            fig.savefig(str(fpath) + ".png", dpi=300, bbox_inches="tight")

    plt.show(block=False)
    if close:
        plt.close()

    return fig, ax, cax
예제 #7
0
def run_tests(datout, tests, dat_mean=None, dat_inst=None, sim_id="", trb_exp=False,
              hor_avg=False, chunks=None, figloc=None, **kw):
    """Run test functions for WRF output postprocessed with WRFlux.
       Thresholds are hard-coded.

    Parameters
    ----------
    datout : nested dict
        Postprocessed output for all variables.
    tests : list of str
        Tests to perform.
        Choices: testing.all_tests
    dat_mean : xarray Dataset
        WRF time-averaged output.
    dat_inst : xarray DataArray, optional
        WRF instantaneous output needed for w test. The default is None.
    sim_id : str, optional
        ID of the current test simulation. The default is "".
    trb_exp : bool, optional
        Turbulent fluxes were calculated explicitly. The default is False.
    hor_avg : bool, optional
        Horizontal averaging was used in postprocessing. The default is False.
    chunks : dict of integers, optional
        Mapping from dimension "x" and/or "y" to chunk sizes used in postprocessing.
        If given, the boundaries in the chunking directions are pruned.
        The default is None.
    figloc : str or path-like, optional
        Directory to save plot in. Defaults to the parent directory of this script.
    **kw :
        Keyword arguments passed to test functions.

    Returns
    -------
    failed : pandas DataFrame
        "FAIL" and "pass" labels for all tests and variables.
    err : pandas DataFrame
        R2 error statistics for performed tests.

    """
    if tests is None:
        tests = all_tests
    else:
        #drop duplicates
        tests = list(set(tests))
    tests = tests.copy()
    for test in tests:
        if test not in all_tests:
            raise ValueError("Test {} not available! Available tests:\n{}".format(test, ", ".join(all_tests)))
    variables = list(datout.keys())
    failed = pd.DataFrame(columns=tests, index=variables)
    err = pd.DataFrame(columns=tests, index=variables)
    failed[:] = ""
    err[:] = ""

    # cut boundaries for non-periodic BC or if chunking was used
    attrs = datout[variables[0]]["flux"].attrs
    iloc = {}
    if (not attrs["PERIODIC_X"]) or (chunks is not None and "x" in chunks):
        iloc["x"] = slice(1, -1)
    if (not attrs["PERIODIC_Y"]) or (chunks is not None and "y" in chunks):
        iloc["y"] = slice(1, -1)

    if attrs["PERIODIC_Y"] == 0:
        if "Y=0" in tests:
            tests.remove("Y=0")
    cyclic = {d: bool(attrs["PERIODIC_{}".format(d.upper())]) for d in tools.xy}
    cyclic["bottom_top"] = False

    avg_dims = None
    if hor_avg:
        avg_dims = []
        dat = datout[variables[0]]["tend"]["adv"]
        for d in tools.xy:
            if (d not in dat.dims) and (d + "_stag" not in dat.dims):
                avg_dims.append(d)

    # for w test: cut first time step
    if dat_inst is not None:
        dat_inst_lim = dat_inst.isel(Time=slice(1, None), **iloc)
    elif ("w" in tests) or ("dim_coords" in tests):
        raise ValueError("For tests 'w' and 'dim_coords', dat_inst needs to be given!")
    datout_lim = {}
    for v, datout_v in datout.items():
        datout_lim[v] = {}
        for n, dat in datout_v.items():
            if "ID" in dat.dims:
                IDs = []
                for ID in dat.ID.values:
                    ID = ID.split(" ")
                    IDs.append(" ".join(ID))
                dat["ID"] = IDs
            if "dim_coords" in tests:
                test_dim_coords(dat, dat_inst, v, n, failed)
            if hor_avg:
                for avg_dim in avg_dims:
                    for stag in ["", "_stag"]:
                        assert avg_dim + stag not in dat.dims
            datout_lim[v][n] = tools.loc_data(dat, iloc=iloc)

    if figloc is None:
        fpath = Path(__file__).parent
    else:
        fpath = Path(figloc)
    for var, datout_v in datout_lim.items():
        print("Variable: " + var)
        figloc = fpath / "figures" / var
        failed_i = {}
        err_i = {}
        if dat_mean is not None:
            dat_mean_v = dat_mean.sel(Time=datout_v["tend"]["Time"])

        if "budget" in tests:
            tend = datout_v["tend"]["net"].sel(side="tendency")
            forcing = datout_v["tend"]["net"].sel(side="forcing")
            kw["figloc"] = figloc / "budget"
            if (var == "w") and ("open BC y hor_avg" in sim_id):
                kw["thresh"] = 0.995
            elif (var in ["u", "v", "w"]) and ("open BC" in sim_id):
                kw["thresh"] = 0.999
            elif var == "t":
                if "open BC" in sim_id:
                    kw["thresh"] = 0.999
                if "symmetric BC" in sim_id:
                    kw["thresh"] = 0.995
                elif attrs["USE_THETA_M"] == 1:

                    if attrs["OUTPUT_DRY_THETA_FLUXES"] == 0:
                        # lower thresh as cartesian tendency for thm is close to 0
                        if attrs["MP_PHYSICS"] > 0:
                            kw["thresh_cartesian"] = 0.96
                            kw["thresh"] = 0.9998
                        else:
                            kw["thresh_cartesian"] = 0.995

                    # reduce threshold for WENO and monotonic advection as
                    # dry theta budget is not perfectly closed
                    elif (attrs["SCALAR_ADV_OPT"] >= 3) or (attrs["MOIST_ADV_OPT"] >= 3):
                        kw["thresh"] = 0.88
                    elif attrs["MOIST_ADV_OPT"] == 2:
                        kw["thresh"] = 0.96

            failed_i["budget"], err_i["budget"] = test_budget(tend, forcing, **kw)
            for thresh in ["thresh", "thresh_cartesian"]:
                if thresh in kw:
                    del kw[thresh]
        adv = datout_v["tend"]["adv"]
        if "decomp_sumdir" in tests:
            if attrs["HESSELBERG_AVG"] == 0:
                kw["thresh"] = 0.995
            elif trb_exp:
                kw["thresh"] = 0.999
            kw["figloc"] = figloc / "decomp_sumdir"
            failed_i["decomp_sumdir"], err_i["decomp_sumdir"] = test_decomp_sumdir(
                adv, datout_v["corr"], **kw)
            if "thresh" in kw:
                del kw["thresh"]

        if "decomp_sumcomp" in tests:
            if trb_exp:
                # reduce threshold for explicit turbulent fluxes
                kw["thresh"] = 0.999
            kw["figloc"] = figloc / "decomp_sumcomp"
            failed_i["decomp_sumcomp"], err_i["decomp_sumcomp"] = test_decomp_sumcomp(adv, **kw)
            if "thresh" in kw:
                del kw["thresh"]

        if ("dz_out" in tests) and (var != "q"):  # TODOm: why so bad for q?
            kw["figloc"] = figloc / "dz_out"
            adv_noavgdir = adv
            if hor_avg:
                thresh = {"t": 0.85, "u": 0.7, "v": 0.995, "w": 0.92}
                kw["thresh"] = thresh[var]
                adv_noavgdir = adv.sel(dir=[d for d in adv.dir.values if d.lower() not in avg_dims])
            failed_i["dz_out"], err_i["dz_out"] = test_dz_out(adv_noavgdir, **kw)
            if "thresh" in kw:
                del kw["thresh"]

        if "adv_2nd" in tests:
            kw["figloc"] = figloc / "adv_2nd"
            failed_i["adv_2nd"], err_i["adv_2nd"] = test_2nd(adv, **kw)

        if ("w" in tests) and (var == variables[-1]):
            # only do test once: for last variable
            kw["figloc"] = figloc / "w"
            failed_i["w"], err_i["w"] = test_w(dat_inst_lim, **kw)

        if ("mass" in tests) and (var == "t"):
            if "dz_out" in tests:
                if hor_avg:
                    kw["thresh"] = 0.85
                else:
                    kw["thresh"] = 0.995

            elif attrs["HESSELBERG_AVG"] == 0:
                kw["thresh"] = 0.99998

            kw["figloc"] = figloc / "mass"
            failed_i["mass"], err_i["mass"] = test_mass(datout_v["tend_mass"], **kw)
            if "thresh" in kw:
                del kw["thresh"]

        if "adv_form" in tests:
            kw["figloc"] = figloc / "adv_form"
            if var in ["u", "w"]:
                kw["thresh"] = 0.995
            if dat_mean is None:
                raise ValueError("For adv_form test, dat_mean needs to be given!")
            failed_i["adv_form"], err_i["adv_form"] = test_adv_form(dat_mean_v, datout_v, var, cyclic,
                                                                    hor_avg=hor_avg, avg_dims=avg_dims, **kw)
            if "thresh" in kw:
                del kw["thresh"]
        if "periodic" in tests:
            kw["figloc"] = figloc / "mass"
            failed_i["periodic"] = test_periodic(datout_v, attrs, **kw)
        if "NaN" in tests:
            failed_i["NaN"] = test_nan(datout_v)

        if "sgs" in tests:
            sgs_sum = datout_v["tend"]["adv"].sel(comp="trb_s").sum("dir")
            if np.allclose(sgs_sum[0], sgs_sum[1], atol=1e-7, rtol=1e-5):
                failed_i["sgs"] = False
            else:
                failed_i["sgs"] = True

        if hor_avg and ("Y=0" in tests):
            failed_i["Y=0"], err_i["Y=0"] = test_y0(adv)

        # store results
        for test, f in failed_i.items():
            if f:
                failed.loc[var, test] = "FAIL"
            else:
                failed.loc[var, test] = "pass"

        for test, e in err_i.items():
            err.loc[var, test] = e

    return failed, err
예제 #8
0
def test_adv_form(dat_mean, datout, var, cyclic=None, hor_avg=False, avg_dims=None,
                  avg_dims_error=None, thresh=0.9995, loc=None, iloc=None, plot=True, **plot_kws):
    """Compare implicit and explicit advective form calculations

    Explicitly calculate 2nd order mean advection in advective form and compare with
    implicit calculation.

    Parameters
    ----------
    dat_mean : xarray Dataset
        WRF time-averaged output.
    datout : dict
        Postprocessed output for variable var.
    var : str
        Variable to process.
    cyclic : dict of booleans for xy or None, optional
        Defines which dimensions have periodic boundary conditions.
        Use periodic boundary conditions to fill lateral boundary points.
        The default is None.
    hor_avg : bool, optional
        Horizontal averaging was used in postprocessing. The default is False.
    avg_dims : str or list of str, optional
        Averaging dimensions if hor_avg=True. The default is None.
    avg_dims_error : str or list of str, optional
        Dimensions over which to calculate the R2. The default is None.
    thresh : float, optional
        Threshold value for R2 below which the test fails
    loc : dict, optional
        Mapping for label based indexing before running the test. The default is None.
    iloc : dict, optional
        Mapping for integer-location based indexing before running the test. The default is None.
    plot : bool, optional
        Create scatter plot if test fails. The default is True.
    **plot_kws :
        keyword arguments passed to plotting.scatter_hue.

    Returns
    -------
    failed : bool
        Test failed.
    err : float
        Test statistic R2

    """

    adv, flux, grid = datout["tend"]["adv"], datout["flux"], datout["grid"]
    dat_mean["bottom_top"] = flux["bottom_top"]
    dat_mean["bottom_top_stag"] = flux["bottom_top_stag"]
    vmean = xr.Dataset({"X": dat_mean["U_MEAN"], "Y": dat_mean["V_MEAN"], "Z": dat_mean["WD_MEAN"]})
    if var == "w":
        v = "ZWIND"
    else:
        v = var.upper()
    var_mean = dat_mean[v + "_MEAN"]
    if hor_avg:
        var_mean = tools.avg_xy(var_mean, avg_dims, cyclic=cyclic)

    vmean_c = xr.Dataset()
    dd = xr.Dataset()
    grad = xr.Dataset()
    tend = xr.Dataset()
    for dim in tools.XYZ:
        if hor_avg:
            vmean[dim] = tools.avg_xy(vmean[dim], avg_dims, cyclic=cyclic, **grid[tools.stagger_const])
        ds = dim.lower()
        if dim == "Z":
            ds = "bottom_top"
        cyc = cyclic[ds]
        d = ds
        if ds in var_mean.dims:
            ds = ds + "_stag"
        else:
            d = d + "_stag"
        if dim == "Z":
            dd[dim] = tools.diff(grid["Z_STAG"], d, new_coord=flux[ds], cyclic=cyc)
        else:
            dd[dim] = grid["D" + dim]

        if d in adv.dims:
            grad[dim] = tools.diff(var_mean, d, new_coord=flux[ds], cyclic=cyc) / dd[dim]
            vmean_c[dim] = tools.stagger_like(vmean[dim], ref=grad[dim], cyclic=cyclic, **grid[tools.stagger_const])

    for dim in tools.XYZ:
        if dim in grad:
            adv_s = - vmean_c[dim] * grad[dim]
            tend[dim] = tools.stagger_like(adv_s, ref=adv, cyclic=cyclic, **grid[tools.stagger_const])
    for dim in ["X", "Y"]:
        if dim in grad:
            corr = grid[f"dzdt_{dim.lower()}"]
            corr = grad["Z"]*tools.stagger_like(corr, ref=grad["Z"], cyclic=cyclic, **grid[tools.stagger_const])
            corr = tools.stagger_like(corr, ref=adv, cyclic=cyclic, **grid[tools.stagger_const])
            tend[dim] = tend[dim] - corr

    tend = tend.to_array("dir")

    fname = None
    if "fname" in plot_kws:
        fname = plot_kws.pop("fname")

    dat = tools.loc_data(adv.sel(ID="cartesian adv_form", dir=["X", "Y", "Z"], comp="mean"), loc=loc, iloc=iloc)
    ref = tools.loc_data(tend, loc=loc, iloc=iloc)
    dat = dat.sel(dir=ref.dir)
    if var == "w":
        dat = dat.isel(bottom_top_stag=slice(1, None))
        ref = ref.isel(bottom_top_stag=slice(1, None))
    err = R2(dat, ref, dim=avg_dims_error).min().values
    failed = False
    if err < thresh:
        failed = True
        log = "test_adv_form: mean advective component: min. R2 less than {}: {:.10f}".format(thresh, err)
        print(log)
        if plot:
            dat.name = "Implicit calculation"
            ref.name = "Explicit calculation"
            if fname is not None:
                log = fname + "\n" +  log
            plotting.scatter_hue(dat, ref, title=log, fname=fname, **plot_kws)
    return failed, err
예제 #9
0
def test_mass(tend_mass, avg_dims_error=None, thresh=0.99999999,
              loc=None, iloc=None, plot=True, **plot_kws):
    """Test closure of continuity equation.

    In the tendency calculations the vertical component of the continuity equation
    is calculated as residual to improve the budget closure which leads to automatic
    closure of the continuity equation.
    This test ensures that this residual calculation does not produce larger changes
    in the vertical component by comparing the residual calculation with the
    explicit calculation which uses the vertical velocity.
    For the dz_out type formulations, the continuity equation cannot be well closed.
    Therefore, we only compare the individual components with the standard Cartesian
    formulation.
    The test fails if the coefficient of determination
    is below the given threshold. If avg_dims_error is given, the averaging in the
    R2 calculation is only carried out over these dimensions. Afterwards the minimum R2
    value is taken over the remaining dimensions.

    Parameters
    ----------
    tend_mass : xarray DataArray
        Components of continuity equation.
    avg_dims_error : str or list of str, optional
        Dimensions over which to calculate the R2. The default is None.
    thresh : float, optional
        Threshold value for R2 below which the test fails
    loc : dict, optional
        Mapping for label based indexing before running the test. The default is None.
    iloc : dict, optional
        Mapping for integer-location based indexing before running the test. The default is None.
    plot : bool, optional
        Create scatter plot if test fails. The default is True.
    **plot_kws :
        keyword arguments passed to plotting.scatter_hue.

    Returns
    -------
    failed : bool
        Test failed.
    err : float
        Test statistic R2

    """
    ref = tend_mass.sel(dir="Z")
    dat = tend_mass.sel(dir="T") - tend_mass.sel(dir="X") - tend_mass.sel(dir="Y")
    failed = False
    err = []
    fname = ""
    if "fname" in plot_kws:
        fname = plot_kws.pop("fname")
    for ID in dat.ID.values:
        dat_i = dat.sel(ID=ID)
        ref_i = ref.sel(ID=ID)
        dat_i = tools.loc_data(dat_i, loc=loc, iloc=iloc)
        ref_i = tools.loc_data(ref_i, loc=loc, iloc=iloc)
        e = R2(dat_i, ref_i, dim=avg_dims_error).min().values
        err.append(e)
        if e < thresh:
            log = "test_mass: vertical component of continuity equation\n for ID={}: min. R2 less than {}: {:.10f}".format(ID, thresh, e)
            print(log)
            if plot:
                dat_i.name = "Residual calculation"
                ref_i.name = "Calculation with vertical velocity"

                fname_i = fname
                if fname is not None:
                    fname_i = "ID=" + ID + "_" + fname
                    log = fname_i + "\n" +  log
                plotting.scatter_hue(dat_i, ref_i, title=log, fname=fname_i, **plot_kws)
            failed = True
    return failed, min(err)
예제 #10
0
def test_budget(tend, forcing, avg_dims_error=None, thresh=0.9999, thresh_cartesian=None,
                budget_methods=("native", "adv_form", "cartesian", "cartesian adv_form"),
                loc=None, iloc=None, plot=True, **plot_kws):
    """
    Test closure of budget: tend = forcing.

    The test fails if the coefficient of determination
    is below the given threshold. If avg_dims_error is given, the averaging in the
    R2 calculation is only carried out over these dimensions. Afterwards the minimum R2
    value is taken over the remaining dimensions.

    Parameters
    ----------
    tend : xarray DataArray
        Total tendency.
    forcing : xarray DataArray
        Total forcing.
    avg_dims_error : str or list of str, optional
        Dimensions over which to calculate the R2. The default is None.
    thresh : float, optional
        Threshold value for R2 below which the test fails
    thresh_cartesian : float, optional
        Use different threshold value for Cartesian coordinate system.
        The default is None, for which 'thresh' is used in both formulations.
    budget_methods : list of str
        Budget methods to consider. By default, only "native", "adv_form", "cartesian", and "cartesian adv_form" are tested.
    loc : dict, optional
        Mapping for label based indexing before running the test. The default is None.
    iloc : dict, optional
        Mapping for integer-location based indexing before running the test. The default is None.
    plot : bool, optional
        Create scatter plot if test fails. The default is True.
    **plot_kws :
        keyword arguments passed to plotting.scatter_hue.

    Returns
    -------
    failed : bool
        Test failed.
    err : float
        Test statistic R2

    """
    failed = False
    err = []
    fname = ""
    if "fname" in plot_kws:
        fname = plot_kws.pop("fname")
    for ID in budget_methods:
        thresh_i = thresh
        if (ID == "cartesian") and (thresh_cartesian is not None):
            thresh_i = thresh_cartesian
        if ID not in tend.ID:
            continue
        ref = tend.sel(ID=ID, drop=True)
        dat = forcing.sel(ID=ID, drop=True)
        dat = tools.loc_data(dat, loc=loc, iloc=iloc)
        ref = tools.loc_data(ref, loc=loc, iloc=iloc)
        e = R2(dat, ref, dim=avg_dims_error).min().values
        err.append(e)

        if e < thresh_i:
            log = "test_budget for ID='{}': min. R2 less than {}: {:.10f}\n".format(ID, thresh_i, e)
            print(log)
            if plot:
                dat.name = dat.description[:8] + "forcing"
                ref.name = ref.description
                fname_i = fname
                if fname is not None:
                    fname_i = "ID=" + ID + "_" + fname
                    log = fname_i + "\n" +  log
                plotting.scatter_hue(dat, ref, title=log, fname=fname_i, **plot_kws)
            failed = True

    return failed, min(err)
예제 #11
0
def test_decomp_sumcomp(adv, avg_dims_error=None, thresh=0.999995,
                        loc=None, iloc=None, plot=True, **plot_kws):
    """Test that the total advective tendency is indeed the sum of the mean and
    resolved turbulent components in all three spatial directions.

    The test fails if the coefficient of determination
    is below the given threshold. If avg_dims_error is given, the averaging in the
    R2 calculation is only carried out over these dimensions. Afterwards the minimum R2
    value is taken over the remaining dimensions.


    Parameters
    ----------
    adv : xarray DataArray
        Advective tendencies.
    avg_dims_error : str or list of str, optional
        Dimensions over which to calculate the R2. The default is None.
    thresh : float, optional
        Threshold value for R2 below which the test fails
    loc : dict, optional
        Mapping for label based indexing before running the test. The default is None.
    iloc : dict, optional
        Mapping for integer-location based indexing before running the test. The default is None.
    plot : bool, optional
        Create scatter plot if test fails. The default is True.
    **plot_kws :
        keyword arguments passed to plotting.scatter_hue.

    Returns
    -------
    failed : bool
        Test failed.
    err : float
        Test statistic R2

    """
    ref = adv.sel(comp="trb_r")
    dat = adv.sel(comp="adv_r") - adv.sel(comp="mean")
    dat = tools.loc_data(dat, loc=loc, iloc=iloc)
    ref = tools.loc_data(ref, loc=loc, iloc=iloc)
    failed = False
    err = []
    fname = ""
    if "fname" in plot_kws:
        fname = plot_kws.pop("fname")
    for ID in dat.ID.values:
        dat_i = dat.sel(ID=ID)
        ref_i = ref.sel(ID=ID)
        e = R2(dat_i, ref_i, dim=avg_dims_error).min().values
        err.append(e)
        if e < thresh:
            log = "decomp_sumcomp, {} (XYZ) for ID={}: min. R2 less than {}: {:.8f}".format(
                dat.description, ID, thresh, e)
            print(log)
            if plot:
                ref_i.name = "trb_r"
                dat_i.name = "adv_r - mean"
                fname_i = fname
                if fname is not None:
                    fname_i = "ID=" + ID + "_" + fname
                    log = fname_i + "\n" +  log
                plotting.scatter_hue(dat_i, ref_i, title=log, fname=fname_i, **plot_kws)
            failed = True
    return failed, min(err)
예제 #12
0
def run_tests(datout,
              tests,
              dat_inst=None,
              sim_id=None,
              trb_exp=False,
              hor_avg=False,
              chunks=None,
              **kw):
    """Run test functions for WRF output postprocessed with WRFlux.
       Thresholds are hard-coded.

    Parameters
    ----------
    datout : nested dict
        Postprocessed output for all variables.
    tests : list of str
        Tests to perform.
        Choices: budget, decomp_sumdir, decomp_sumcomp, dz_out, adv_2nd, w, Y=0, NaN
    dat_inst : xarray DataArray, optional
        WRF instantaneous output needed for w test. The default is None.
    sim_id : str, optional
        ID of the current test simulation. The default is None.
    trb_exp : bool, optional
        Turbulent fluxes were calculated explicitly. The default is False.
    hor_avg : bool, optional
        Horizontal averaging was used in postprocessing. The default is False.
    chunks : dict of integers, optional
        Mapping from dimension "x" and/or "y" to chunk sizes used in postprocessing.
        If given, the boundaries in the chunking directions are pruned.
        The default is None.
    **kw :
        Keyword arguments passed to test functions.

    Returns
    -------
    failed : pandas DataFrame
        "FAIL" and "pass" labels for all tests and variables.
    err : pandas DataFrame
        NSE error statistics for performed tests.

    """
    if tests is None:
        tests = all_tests

    variables = list(datout.keys())
    failed = pd.DataFrame(columns=tests, index=variables)
    err = pd.DataFrame(columns=tests, index=variables)
    failed[:] = ""
    err[:] = ""

    # cut boundaries for non-periodic BC or if chunking was used
    attrs = datout[variables[0]]["flux"].attrs
    iloc = {}
    if (not attrs["PERIODIC_X"]) or (chunks is not None and "x" in chunks):
        iloc["x"] = slice(1, -1)
    if (not attrs["PERIODIC_Y"]) or (chunks is not None and "y" in chunks):
        iloc["y"] = slice(1, -1)

    if attrs["PERIODIC_Y"] == 0:
        if "Y=0" in tests:
            tests.remove("Y=0")
    # for w test: cut first time step
    dat_inst_lim = dat_inst.isel(Time=slice(1, None), **iloc)

    datout_lim = {}
    for v, datout_v in datout.items():
        datout_lim[v] = {}
        for n, dat in datout_v.items():
            if "dim_coords" in tests:
                test_dim_coords(dat, dat_inst, v, n, failed)
            datout_lim[v][n] = tools.loc_data(dat, iloc=iloc)

    fpath = Path(__file__).parent
    for var, datout_v in datout_lim.items():
        print("Variable: " + var)
        figloc = fpath / "figures" / var
        failed_i = {}
        err_i = {}

        if "budget" in tests:
            # TODOm: change threshold depending on ID?
            tend = datout_v["tend"].sel(comp="tendency")
            forcing = datout_v["tend"].sel(comp="forcing")
            kw["figloc"] = figloc / "budget"
            if (var == "w") and ("open BC y hor_avg" in sim_id):
                kw["thresh"] = 0.99
            elif (var == "t") and (attrs["USE_THETA_M"]
                                   == 1) and (attrs["OUTPUT_DRY_THETA_FLUXES"]
                                              == 1):
                # reduce threshold for WENO and monotonic advection as
                # dry theta budget is not perfectly closed
                if (attrs["SCALAR_ADV_OPT"] >= 3) and (attrs["MOIST_ADV_OPT"]
                                                       >= 3):
                    kw["thresh"] = 0.95
                elif (attrs["SCALAR_ADV_OPT"] >= 3):
                    kw["thresh"] = 0.7
                elif attrs["MOIST_ADV_OPT"] == 2:
                    kw["thresh"] = 0.98

            failed_i["budget"], err_i["budget"] = test_budget(
                tend, forcing, **kw)
            if "thresh" in kw:
                del kw["thresh"]
        adv = datout_v["adv"]
        if "decomp_sumdir" in tests:
            if trb_exp or hor_avg or (attrs["HESSELBERG_AVG"] == 0):
                # reduce threshold
                kw["thresh"] = 0.992
            kw["figloc"] = figloc / "decomp_sumdir"
            failed_i["decomp_sumdir"], err_i[
                "decomp_sumdir"] = test_decomp_sumdir(adv, datout_v["corr"],
                                                      **kw)
            if "thresh" in kw:
                del kw["thresh"]

        if "decomp_sumcomp" in tests:
            if trb_exp:
                # reduce threshold for explicit turbulent fluxes
                kw["thresh"] = 0.995
            kw["figloc"] = figloc / "decomp_sumcomp"
            failed_i["decomp_sumcomp"], err_i[
                "decomp_sumcomp"] = test_decomp_sumcomp(adv, **kw)
            if "thresh" in kw:
                del kw["thresh"]

        if ("dz_out" in tests) and (var != "q"):  # TODOm: why so bad for q?
            kw["figloc"] = figloc / "dz_out"
            failed_i["dz_out"], err_i["dz_out"] = test_dz_out(adv, **kw)

        if "adv_2nd" in tests:
            kw["figloc"] = figloc / "adv_2nd"
            failed_i["adv_2nd"], err_i["adv_2nd"] = test_2nd(adv, **kw)

        if ("w" in tests) and (var == variables[-1]) and (dat_inst
                                                          is not None):
            # only do test once: for last variable
            kw["figloc"] = figloc / "w"
            failed_i["w"], err_i["w"] = test_w(dat_inst_lim, **kw)

        if "NaN" in tests:
            failed_i["NaN"] = test_nan(datout_v)

        if hor_avg and ("Y=0" in tests):
            failed_i["Y=0"], err_i["Y=0"] = test_y0(adv)

        # store results
        for test, f in failed_i.items():
            if f:
                failed.loc[var, test] = "FAIL"
            else:
                failed.loc[var, test] = "pass"

        for test, e in err_i.items():
            err.loc[var, test] = e

    return failed, err
예제 #13
0
def test_budget(tend,
                forcing,
                avg_dims_error=None,
                thresh=0.9993,
                loc=None,
                iloc=None,
                plot=True,
                **plot_kws):
    """
    Test closure of budget: tend = forcing.

    Only the budget methods "native" and "cartesian" are tested.
    The test fails if the Nash-Sutcliffe efficiency coefficient (NSE)
    is below the given threshold. If avg_dims_error is given, the averaging in the
    NSE calculation is only carried out over these dimensions. Afterwards the minimum NSE
    value is taken over the remaining dimensions.

    Parameters
    ----------
    tend : xarray DataArray
        Total tendency.
    forcing : xarray DataArray
        Total forcing.
    avg_dims_error : str or list of str, optional
        Dimensions over which to calculate the NSE. The default is None.
    thresh : float, optional
        Threshold value for NSE below which the test fails
    loc : dict, optional
        Mapping for label based indexing before running the test. The default is None.
    iloc : dict, optional
        Mapping for integer-location based indexing before running the test. The default is None.
    plot : bool, optional
        Create scatter plot if test fails. The default is True.
    **plot_kws :
        keyword arguments passed to plotting.scatter_hue.

    Returns
    -------
    failed : bool
        Test failed.
    err : float
        Test statistic NSE

    """
    failed = False
    err = []
    for ID in ["native", "cartesian"]:
        if ID not in tend.ID:
            continue
        ref = tend.sel(ID=ID, drop=True)
        dat = forcing.sel(ID=ID, drop=True)
        dat = tools.loc_data(dat, loc=loc, iloc=iloc)
        ref = tools.loc_data(ref, loc=loc, iloc=iloc)
        e = tools.nse(dat, ref, dim=avg_dims_error).min().values
        err.append(e)
        if e < thresh:
            log = "test_budget for ID='{}': min. NSE less than {}: {:.5f}\n".format(
                ID, thresh, e)
            print(log)
            if plot:
                dat.name = dat.description[:2] + "forcing"
                ref.name = ref.description
                plotting.scatter_hue(dat, ref, title=log, **plot_kws)
            failed = True
    return failed, min(err)