def rsq(obs, sim=None, res=None, missing="drop", weighted=False, max_gap=30, nparam=None): """Compute R-squared, possibly adjusted for the number of free parameters. Parameters ---------- obs: pandas.Series Series with the observed values. sim: pandas.Series Series with the simulated values. res: pandas.Series Series with the residual values. If time series for the residuals are provided, the sim and obs arguments are ignored. missing: str, optional string with the rule to deal with missing values. Only "drop" is supported now. weighted: bool, optional If weighted is True, the variances are computed using the time step between observations as weights. Default is False. max_gap: int, optional maximum allowed gap period in days to use for the computation of the weights. All time steps larger than max_gap are replace with the max_gap value. Default value is 30 days. nparam: int, optional number of calibrated parameters. Notes ----- .. math:: \\rho_{adj} = 1- \\frac{n-1}{n-n_{param}}*\\frac{rss}{tss} Where n is the number of observations, :math:`n_{param}` the number of free parameters, rss the sum of the squared residuals, and tss the total sum of squared residuals. When nparam is provided, the :math:`\\rho` is adjusted for the number of calibration parameters. """ if res is None: res = sim - obs if missing == "drop": res = res.dropna() # Return nan if the time indices of the sim and obs don't match if res.index.size == 0: logger.warning("Time indices of the sim and obs don't match.") return nan w = _get_weights(res, weighted=weighted, max_gap=max_gap) mu = average(obs.to_numpy(), weights=w) rss = (w * res.to_numpy() ** 2.0).sum() tss = (w * (obs.to_numpy() - mu) ** 2.0).sum() if nparam: return 1.0 - (obs.size - 1.0) / (obs.size - nparam) * rss / tss else: return 1.0 - rss / tss
def pearsonr(obs, sim, missing="drop", weighted=False, max_gap=30): """Compute the (weighted) Pearson correlation (r). Parameters ---------- sim: pandas.Series Series with the simulated values. obs: pandas.Series Series with the observed values. missing: str, optional string with the rule to deal with missing values in the observed series. Only "drop" is supported now. weighted: bool, optional Weight the values by the normalized time step to account for irregular time series. Default is False. max_gap: int, optional maximum allowed gap period in days to use for the computation of the weights. All time steps larger than max_gap are replace with the max_gap value. Default value is 30 days. Notes ----- The Pearson correlation (r) is computed as follows: .. math:: r = \\frac{\\sum_{i=1}^{N}w_i (x_i - \\bar{x})(y_i - \\bar{y})} {\\sqrt{\\sum_{i=1}^{N} w_i(x_i-\\bar{x})^2 \\sum_{i=1}^{N} w_i(y_i-\\bar{y})^2}} Where :math:`x` is is observed time series, :math:`y` the simulated time series, and :math:`N` the number of observations in the observed time series. """ if missing == "drop": obs = obs.dropna() w = _get_weights(obs, weighted=weighted, max_gap=max_gap) sim = sim.reindex(obs.index).dropna().to_numpy() # Return nan if the time indices of the sim and obs don't match if sim.size == 0: logger.warning("Time indices of the sim and obs don't match.") return nan sim = sim - average(sim, weights=w) obs = obs.to_numpy() - average(obs.to_numpy(), weights=w) r = (w * sim * obs).sum() / \ sqrt((w * sim ** 2).sum() * (w * obs ** 2).sum()) return r
def nse(obs, sim=None, res=None, missing="drop", weighted=False, max_gap=30): """Compute the (weighted) Nash-Sutcliffe Efficiency (NSE). Parameters ---------- obs: pandas.Series Series with the observed values. sim: pandas.Series Series with the simulated values. res: pandas.Series Series with the residual values. If time series for the residuals are provided, the sim and obs arguments are ignored. missing: str, optional string with the rule to deal with missing values. Only "drop" is supported now. weighted: bool, optional If weighted is True, the variances are computed using the time step between observations as weights. Default is False. max_gap: int, optional maximum allowed gap period in days to use for the computation of the weights. All time steps larger than max_gap are replace with the max_gap value. Default value is 30 days. Notes ----- .. math:: \\text{NSE} = 1 - \\frac{\\sum(h_s-h_o)^2}{\\sum(h_o-\\mu_{h,o})} References ---------- .. [nash_1970] Nash, J. E., & Sutcliffe, J. V. (1970). River flow forecasting through conceptual models part I-A discussion of principles. Journal of hydrology, 10(3), 282-230. """ if res is None: res = sim - obs if missing == "drop": res = res.dropna() # Return nan if the time indices of the sim and obs don't match if res.index.size == 0: logger.warning("Time indices of the sim and obs don't match.") return nan w = _get_weights(res, weighted=weighted, max_gap=max_gap) mu = average(obs.to_numpy(), weights=w) return 1 - (w * res.to_numpy() ** 2).sum() / \ (w * (obs.to_numpy() - mu) ** 2).sum()
def mae(obs=None, sim=None, res=None, missing="drop", weighted=False, max_gap=30): """Compute the (weighted) Mean Absolute Error (MAE). Parameters ---------- sim: pandas.Series Series with the simulated values. obs: pandas.Series Series with the observed values. res: pandas.Series Series with the residual values. If time series for the residuals are provided, the sim and obs arguments are ignored. missing: str, optional string with the rule to deal with missing values. Only "drop" is supported now. weighted: bool, optional Weight the values by the normalized time step to account for irregular time series. Default is True. max_gap: int, optional maximum allowed gap period in days to use for the computation of the weights. All time steps larger than max_gap are replace with the max_gap value. Default value is 30 days. Notes ----- The Mean Absolute Error (MAE) between two time series x and y is computed as follows: .. math:: \\text{MAE} = \\sum_{i=1}^{N} w_i |x_i - y_i| where :math:`N` is the number of observations in the observed time series. """ if res is None: res = sim - obs if missing == "drop": res = res.dropna() # Return nan if the time indices of the sim and obs don't match if res.index.size == 0: logger.warning("Time indices of the sim and obs don't match.") return nan w = _get_weights(res, weighted=weighted, max_gap=max_gap) return (w * abs(res.to_numpy())).sum()
def rmse(obs=None, sim=None, res=None, missing="drop", weighted=False, max_gap=30): """Compute the (weighted) Root Mean Squared Error (RMSE). Parameters ---------- sim: pandas.Series Series with the simulated values. obs: pandas.Series Series with the observed values. res: pandas.Series Series with the residual values. If time series for the residuals are provided, the sim and obs arguments are ignored. missing: str, optional string with the rule to deal with missing values. Only "drop" is supported now. weighted: bool, optional Weight the values by the normalized time step to account for irregular time series. Default is False. max_gap: int, optional maximum allowed gap period in days to use for the computation of the weights. All time steps larger than max_gap are replace with the max_gap value. Default value is 30 days. Notes ----- Computes the Root Mean Squared Error (RMSE) as follows: .. math:: \\text{RMSE} = \\sqrt{\\sum_{i=1}^{N} w_i(n_i- \\bar{n})^2} where :math:`N` is the number of residuals :math:`n`. """ if res is None: res = sim - obs if missing == "drop": res = res.dropna() # Return nan if the time indices of the sim and obs don't match if res.index.size == 0: logger.warning("Time indices of the sim and obs don't match.") return nan w = _get_weights(res, weighted=weighted, max_gap=max_gap) return sqrt((w * res.to_numpy() ** 2).sum())