def beta_kge(obs: DataArray, sim: DataArray) -> float: # verify inputs _validate_inputs(obs, sim) # get time series with only valid observations obs, sim = _mask_valid(obs, sim) return float(sim.mean() / obs.mean())
def kge(obs: DataArray, sim: DataArray, weights: List[float] = [1., 1., 1.]) -> float: r"""Calculate the Kling-Gupta Efficieny [#]_ .. math:: \text{KGE} = 1 - \sqrt{[ s_r (r - 1)]^2 + [s_\alpha ( \alpha - 1)]^2 + [s_\beta(\beta_{\text{KGE}} - 1)]^2}, where :math:`r` is the correlation coefficient, :math:`\alpha` the :math:`\alpha`-NSE decomposition, :math:`\beta_{\text{KGE}}` the fraction of the means and :math:`s_r, s_\alpha, s_\beta` the corresponding weights (here the three float values in the `weights` parameter). Parameters ---------- obs : DataArray Observed time series. sim : DataArray Simulated time series. weights : List[float] Weighting factors of the 3 KGE parts, by default each part has a weight of 1. Returns ------- float Kling-Gupta Efficiency References ---------- .. [#] Gupta, H. V., Kling, H., Yilmaz, K. K., & Martinez, G. F. (2009). Decomposition of the mean squared error and NSE performance criteria: Implications for improving hydrological modelling. Journal of hydrology, 377(1-2), 80-91. """ if len(weights) != 3: raise ValueError("Weights of the KGE must be a list of three values") # verify inputs _validate_inputs(obs, sim) # get time series with only valid observations obs, sim = _mask_valid(obs, sim) if len(obs) < 2: return np.nan r, _ = stats.pearsonr(obs.values, sim.values) alpha = sim.std() / obs.std() beta = sim.mean() / obs.mean() value = (weights[0] * (r - 1)**2 + weights[1] * (alpha - 1)**2 + weights[2] * (beta - 1)**2) return 1 - np.sqrt(float(value))
def runoff_ratio(da: DataArray, prcp: DataArray) -> float: # get precip coordinate name (to avoid problems with 'index' or 'date') coord_name = list(prcp.coords.keys())[0] # slice prcp to the same time window as the discharge prcp = prcp.sel({coord_name: slice(da.coords["date"][0], da.coords["date"][-1])}) # calculate runoff ratio value = da.mean() / prcp.mean() return float(value)
def low_q_dur(da: DataArray, threshold: float = 0.2) -> float: """Calculate low-flow duration. Average duration of low-flow events (number of consecutive steps <`threshold` times the median flow) [#]_, [#]_ (Table 2). Parameters ---------- da : DataArray Array of flow values. threshold : float, optional Low-flow threshold. Values below ``threshold * median`` are considered low flows. Returns ------- float Low-flow duration References ---------- .. [#] Olden, J. D. and Poff, N. L.: Redundancy and the choice of hydrologic indices for characterizing streamflow regimes. River Research and Applications, 2003, 19, 101--121, doi:10.1002/rra.700 .. [#] Westerberg, I. K. and McMillan, H. K.: Uncertainty in hydrological signatures. Hydrology and Earth System Sciences, 2015, 19, 3951--3968, doi:10.5194/hess-19-3951-2015 """ mean_flow = float(da.mean()) idx = np.where(da.values < threshold * mean_flow)[0] if len(idx) > 0: periods = _split_list(idx) lqd = np.mean([len(p) for p in periods]) else: lqd = np.nan return lqd
def low_q_freq(da: DataArray, coord: str = "date", threshold: float = 0.2) -> float: # determine the date of the first January 1st in the data period first_date = da.coords[coord][0].values.astype("datetime64[s]").astype(datetime) last_date = da.coords[coord][-1].values.astype("datetime64[s]").astype(datetime) if first_date == datetime.strptime(f"{first_date.year}-01-01", "%Y-%m-%d"): start_date = first_date else: start_date = datetime.strptime(f"{first_date.year + 1}-01-01", "%Y-%m-%d") # end date of the first full year period end_date = start_date + relativedelta(years=1) - relativedelta(days=1) # determine the mean flow over the entire period mean_flow = da.mean(skipna=True) lqfs = [] while end_date < last_date: data = da.sel({coord: slice(start_date, end_date)}) # number of days with discharge lower than threshold * median in a one year period n_days = (data < (threshold * mean_flow)).sum() lqfs.append(float(n_days)) start_date += relativedelta(years=1) end_date += relativedelta(years=1) return np.mean(lqfs)
def low_q_dur(da: DataArray, threshold: float = 0.2) -> float: mean_flow = float(da.mean()) idx = np.where(da.values < threshold * mean_flow)[0] if len(idx) > 0: periods = _split_list(idx) lqd = np.mean([len(p) for p in periods]) else: lqd = np.nan return lqd
def stream_elas(da: DataArray, prcp: DataArray, coord: str = 'date') -> float: # rename precip coordinate name (to avoid problems with 'index' or 'date') prcp = prcp.rename({list(prcp.coords.keys())[0]: coord}) # slice prcp to the same time window as the discharge prcp = prcp.sel({coord: slice(da.coords[coord][0], da.coords[coord][-1])}) # determine the date of the first October 1st in the data period first_date = da.coords[coord][0].values.astype('datetime64[s]').astype( datetime) last_date = da.coords[coord][-1].values.astype('datetime64[s]').astype( datetime) if first_date > datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d'): start_date = datetime.strptime(f'{first_date.year + 1}-10-01', '%Y-%m-%d') else: start_date = datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d') end_date = start_date + relativedelta(years=1) - relativedelta(days=1) # mask only valid time steps (only discharge has missing values) idx = (da >= 0) & (~da.isnull()) da = da[idx] prcp = prcp[idx] # calculate long-term means q_mean_total = da.mean() p_mean_total = prcp.mean() values = [] while end_date < last_date: q = da.sel({coord: slice(start_date, end_date)}) p = prcp.sel({coord: slice(start_date, end_date)}) val = (q.mean() - q_mean_total) / (p.mean() - p_mean_total) * ( p_mean_total / q_mean_total) values.append(val) start_date += relativedelta(years=1) end_date += relativedelta(years=1) return np.median([float(v) for v in values])
def runoff_ratio(da: DataArray, prcp: DataArray, datetime_coord: str = None) -> float: """Calculate runoff ratio. Runoff ratio (ratio of mean discharge to mean precipitation) [#]_ (Eq. 2). Parameters ---------- da : DataArray Array of flow values. prcp : DataArray Array of precipitation values. datetime_coord : str, optional Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Returns ------- float Runoff ratio. References ---------- .. [#] Sawicz, K., Wagener, T., Sivapalan, M., Troch, P. A., and Carrillo, G.: Catchment classification: empirical analysis of hydrologic similarity based on catchment function in the eastern USA. Hydrology and Earth System Sciences, 2011, 15, 2895--2911, doi:10.5194/hess-15-2895-2011 """ if datetime_coord is None: datetime_coord = utils.infer_datetime_coord(da) # rename precip coordinate name (to avoid problems with 'index' or 'date') prcp = prcp.rename({list(prcp.coords.keys())[0]: datetime_coord}) # slice prcp to the same time window as the discharge prcp = prcp.sel({ datetime_coord: slice(da.coords[datetime_coord][0], da.coords[datetime_coord][-1]) }) # calculate runoff ratio value = da.mean() / prcp.mean() return float(value)
def kge(obs: DataArray, sim: DataArray, weights: list = [1, 1, 1]) -> float: if len(weights) != 3: raise ValueError("Weights of the KGE must be a list of three values") # verify inputs _validate_inputs(obs, sim) # get time series with only valid observations obs, sim = _mask_valid(obs, sim) r, _ = stats.pearsonr(obs.values, sim.values) alpha = sim.std() / obs.std() beta = sim.mean() / obs.mean() value = (weights[0] * (r - 1)**2 + weights[1] * (alpha - 1)**2 + weights[2] * (beta - 1)**2) return 1 - np.sqrt(float(value))
def q_mean(da: DataArray) -> float: """Calculate mean discharge. Parameters ---------- da : DataArray Array of flow values. Returns ------- float Mean discharge. """ return float(da.mean())
def nse(obs: DataArray, sim: DataArray) -> float: # verify inputs _validate_inputs(obs, sim) # get time series with only valid observations obs, sim = _mask_valid(obs, sim) denominator = ((obs - obs.mean()) ** 2).sum() numerator = ((sim - obs) ** 2).sum() value = 1 - numerator / denominator return float(value)
def beta_nse(obs: DataArray, sim: DataArray) -> float: r"""Calculate the beta NSE decomposition [#]_ The beta NSE decomposition is the difference of the mean simulation and mean observation divided by the standard deviation of the observations. .. math:: \beta = \frac{\mu_s - \mu_o}{\sigma_o}, where :math:`\mu_s` is the mean of the simulations (here, `sim`), :math:`\mu_o` is the mean of the observations (here, `obs`) and :math:`\sigma_o` the standard deviation of the observations. Parameters ---------- obs : DataArray Observed time series. sim : DataArray Simulated time series. Returns ------- float Beta NSE decomposition. References ---------- .. [#] Gupta, H. V., Kling, H., Yilmaz, K. K., & Martinez, G. F. (2009). Decomposition of the mean squared error and NSE performance criteria: Implications for improving hydrological modelling. Journal of hydrology, 377(1-2), 80-91. """ # verify inputs _validate_inputs(obs, sim) # get time series with only valid observations obs, sim = _mask_valid(obs, sim) return float((sim.mean() - obs.mean()) / obs.std())
def beta_kge(obs: DataArray, sim: DataArray) -> float: r"""Calculate the beta KGE term [#]_ The beta term of the Kling-Gupta Efficiency is defined as the fraction of the means. .. math:: \beta_{\text{KGE}} = \frac{\mu_s}{\mu_o}, where :math:`\mu_s` is the mean of the simulations (here, `sim`) and :math:`\mu_o` is the mean of the observations (here, `obs`). Parameters ---------- obs : DataArray Observed time series. sim : DataArray Simulated time series. Returns ------- float Beta NSE decomposition. References ---------- .. [#] Gupta, H. V., Kling, H., Yilmaz, K. K., & Martinez, G. F. (2009). Decomposition of the mean squared error and NSE performance criteria: Implications for improving hydrological modelling. Journal of hydrology, 377(1-2), 80-91. """ # verify inputs _validate_inputs(obs, sim) # get time series with only valid observations obs, sim = _mask_valid(obs, sim) return float(sim.mean() / obs.mean())
def nse(obs: DataArray, sim: DataArray) -> float: r"""Calculate Nash-Sutcliffe Efficiency [#]_ Nash-Sutcliffe Efficiency is the R-square between observed and simulated discharge. .. math:: \text{NSE} = 1 - \frac{\sum_{t=1}^{T}(Q_m^t - Q_o^t)^2}{\sum_{t=1}^T(Q_o^t - \overline{Q}_o)^2}, where :math:`Q_m` are the simulations (here, `sim`) and :math:`Q_o` are observations (here, `obs`). Parameters ---------- obs : DataArray Observed time series. sim : DataArray Simulated time series. Returns ------- float Nash-Sutcliffe Efficiency References ---------- .. [#] Nash, J. E.; Sutcliffe, J. V. (1970). "River flow forecasting through conceptual models part I - A discussion of principles". Journal of Hydrology. 10 (3): 282-290. doi:10.1016/0022-1694(70)90255-6. """ # verify inputs _validate_inputs(obs, sim) # get time series with only valid observations obs, sim = _mask_valid(obs, sim) denominator = ((obs - obs.mean())**2).sum() numerator = ((sim - obs)**2).sum() value = 1 - numerator / denominator return float(value)
def q_mean(da: DataArray) -> float: return float(da.mean())
def stream_elas(da: DataArray, prcp: DataArray, datetime_coord: str = None) -> float: """Calculate stream elasticity. Streamflow precipitation elasticity (sensitivity of streamflow to changes in precipitation at the annual time scale) [#]_. Parameters ---------- da : DataArray Array of flow values. prcp : DataArray Array of precipitation values. datetime_coord : str, optional Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. Returns ------- float Stream elasticity. References ---------- .. [#] Sankarasubramanian, A., Vogel, R. M., and Limbrunner, J. F.: Climate elasticity of streamflow in the United States. Water Resources Research, 2001, 37, 1771--1781, doi:10.1029/2000WR900330 """ if datetime_coord is None: datetime_coord = utils.infer_datetime_coord(da) # rename precip coordinate name (to avoid problems with 'index' or 'date') prcp = prcp.rename({list(prcp.coords.keys())[0]: datetime_coord}) # slice prcp to the same time window as the discharge prcp = prcp.sel({ datetime_coord: slice(da.coords[datetime_coord][0], da.coords[datetime_coord][-1]) }) # determine the date of the first October 1st in the data period first_date = da.coords[datetime_coord][0].values.astype( 'datetime64[s]').astype(datetime) last_date = da.coords[datetime_coord][-1].values.astype( 'datetime64[s]').astype(datetime) if first_date > datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d'): start_date = datetime.strptime(f'{first_date.year + 1}-10-01', '%Y-%m-%d') else: start_date = datetime.strptime(f'{first_date.year}-10-01', '%Y-%m-%d') end_date = start_date + relativedelta(years=1) - relativedelta(seconds=1) # mask only valid time steps (only discharge has missing values) idx = (da >= 0) & (~da.isnull()) da = da[idx] prcp = prcp[idx] # calculate long-term means q_mean_total = da.mean() p_mean_total = prcp.mean() values = [] while end_date < last_date: q = da.sel({datetime_coord: slice(start_date, end_date)}) p = prcp.sel({datetime_coord: slice(start_date, end_date)}) val = (q.mean() - q_mean_total) / (p.mean() - p_mean_total) * ( p_mean_total / q_mean_total) values.append(val) start_date += relativedelta(years=1) end_date += relativedelta(years=1) return np.median([float(v) for v in values])
def low_q_freq(da: DataArray, datetime_coord: str = None, threshold: float = 0.2) -> float: """Calculate Low-flow frequency. Frequency of low-flow events (<`threshold` times the median flow) [#]_, [#]_ (Table 2). Parameters ---------- da : DataArray Array of flow values. datetime_coord : str, optional Datetime coordinate in the passed DataArray. Tried to infer automatically if not specified. threshold : float, optional Low-flow threshold. Values below ``threshold * median`` are considered low flows. Returns ------- float Low-flow frequency References ---------- .. [#] Olden, J. D. and Poff, N. L.: Redundancy and the choice of hydrologic indices for characterizing streamflow regimes. River Research and Applications, 2003, 19, 101--121, doi:10.1002/rra.700 .. [#] Westerberg, I. K. and McMillan, H. K.: Uncertainty in hydrological signatures. Hydrology and Earth System Sciences, 2015, 19, 3951--3968, doi:10.5194/hess-19-3951-2015 """ if datetime_coord is None: datetime_coord = utils.infer_datetime_coord(da) # determine the date of the first January 1st in the data period first_date = da.coords[datetime_coord][0].values.astype( 'datetime64[s]').astype(datetime) last_date = da.coords[datetime_coord][-1].values.astype( 'datetime64[s]').astype(datetime) if first_date == datetime.strptime(f'{first_date.year}-01-01', '%Y-%m-%d'): start_date = first_date else: start_date = datetime.strptime(f'{first_date.year + 1}-01-01', '%Y-%m-%d') # end date of the first full year period end_date = start_date + relativedelta(years=1) - relativedelta(seconds=1) # determine the mean flow over the entire period mean_flow = da.mean(skipna=True) lqfs = [] while end_date < last_date: data = da.sel({datetime_coord: slice(start_date, end_date)}) # number of steps with discharge lower than threshold * median in a one year period n_steps = (data < (threshold * mean_flow)).sum() lqfs.append(float(n_steps)) start_date += relativedelta(years=1) end_date += relativedelta(years=1) return np.mean(lqfs)