def calc_all_year(flow_matrix, general_params=def_gen_params): key = 'max_nan_allowed_per_year' params = set_user_params(general_params, def_gen_params) max_nan_allowed_per_year = params[key] average_annual_flows = [] standard_deviations = [] coefficient_variations = [] for index, _ in enumerate(flow_matrix[0]): average_annual_flows.append(None) standard_deviations.append(None) coefficient_variations.append(None) """Check to see if water year has more than allowed nan or zeros""" if np.isnan(flow_matrix[:, index]).sum() > max_nan_allowed_per_year: continue average_annual_flows[-1] = np.nanmean(flow_matrix[:, index]) standard_deviations[-1] = np.nanstd(flow_matrix[:, index]) coefficient_variations[-1] = standard_deviations[-1] / \ average_annual_flows[-1] return average_annual_flows, standard_deviations, coefficient_variations
def calc_fall_flush_timings_durations(flow_matrix, summer_timings, class_number, fall_params=def_fall_params): params = set_user_params(fall_params, def_fall_params) max_zero_allowed_per_year, max_nan_allowed_per_year, min_flow_rate, sigma, broad_sigma, wet_season_sigma, peak_sensitivity, peak_sensitivity_wet, max_flush_duration, min_flush_percentage, wet_threshold_perc, peak_detect_perc, flush_threshold_perc, min_flush_threshold, date_cutoff, slope_sensitivity = params.values() start_dates = [] wet_dates = [] durations = [] mags = [] for column_number, _ in enumerate(flow_matrix[0]): start_dates.append(None) wet_dates.append(None) durations.append(None) mags.append(None) """Check to see if water year has more than allowed nan or zeros""" if np.isnan(flow_matrix[:, column_number]).sum() > max_nan_allowed_per_year or np.count_nonzero(flow_matrix[:, column_number] == 0) > max_zero_allowed_per_year or max(flow_matrix[:, column_number]) < min_flow_rate: continue """Get flow data""" flow_data = flow_matrix[:, column_number] x_axis = list(range(len(flow_data))) """Interpolate between None values""" flow_data = replace_nan(flow_data) """Return to Wet Season""" if class_number == 3 or class_number == 4 or class_number == 5 or class_number == 6 or class_number == 7 or class_number == 8: wet_season_filter_data = gaussian_filter1d(flow_data, 6) else: wet_season_filter_data = gaussian_filter1d(flow_data, wet_season_sigma) broad_filter_data = gaussian_filter1d(flow_data, broad_sigma) if class_number == 1 or class_number == 2 or class_number == 9: slope_detection_data = gaussian_filter1d(flow_data, 7) elif class_number == 3 or class_number == 4 or class_number == 5 or class_number == 6 or class_number == 7 or class_number == 8: slope_detection_data = gaussian_filter1d(flow_data, 1) else: slope_detection_data = gaussian_filter1d(flow_data, 4) return_date = return_to_wet_date(flow_data, wet_season_filter_data, broad_filter_data, slope_detection_data, wet_threshold_perc, peak_detect_perc, peak_sensitivity_wet, column_number, slope_sensitivity) if return_date: wet_dates[-1] = return_date broad_filter_data = gaussian_filter1d(flow_data, broad_sigma) """Filter noise data with small sigma to find fall flush hump""" filter_data = gaussian_filter1d(flow_data, sigma) """Fit spline""" x_axis = list(range(len(filter_data))) spl = ip.UnivariateSpline(x_axis, filter_data, k=3, s=3) """Find the peaks and valleys of the filtered data""" mean_flow = np.nanmean(filter_data) maxarray, minarray = peakdet(spl(x_axis), mean_flow * peak_sensitivity) """Find max and min of filtered flow data""" max_flow = max(filter_data[20:]) max_flow_index = find_index(filter_data[20:], max_flow) + 20 min_flow = min(broad_filter_data[:max_flow_index]) """If could not find any max and find""" if not list(maxarray) or not list(minarray) or minarray[0][0] > max_flow_index: continue """Get flow magnitude threshold from previous summer's baseflow""" if column_number == 0: wet_date = wet_dates[0] baseflow = list(flow_matrix[:wet_date, column_number]) # bs_mean = np.mean(baseflow) bs_med = np.nanpercentile(baseflow, 50) else: summer_date = summer_timings[column_number - 1] if wet_dates[column_number]: if wet_dates[column_number] > 20: wet_date = wet_dates[column_number] - 20 else: wet_date = wet_dates[column_number] baseflow = list(flow_matrix[summer_date:, column_number - 1]) + list(flow_matrix[:wet_date, column_number]) # bs_mean = np.mean(baseflow) else: baseflow = list(flow_matrix[summer_date:, column_number - 1]) bs_med = np.nanpercentile(baseflow, 50) """Get fall flush peak""" counter = 0 # Only test duration for first half of fall flush peak half_duration = int(max_flush_duration/2) if bs_med > 25: # if median baseflow is large (>25), magnitude threshold is 50% above median baseflow of previous summer min_flush_magnitude = bs_med * 1.5 else: # otherwise magnitude threshold is 100% above median baseflow of previous summer min_flush_magnitude = bs_med * 2 if min_flush_magnitude < min_flush_threshold: min_flush_magnitude = min_flush_threshold for flow_index in maxarray: if counter == 0: if flow_index[0] < half_duration and flow_index[0] != 0 and flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff: """if index found is before the half duration allowed""" start_dates[-1] = int(flow_index[0]) mags[-1] = flow_index[1] break elif bool((flow_index[1] - spl(maxarray[counter][0] - half_duration)) / flow_index[1] > flush_threshold_perc or minarray[counter][0] - maxarray[counter][0] < half_duration) and flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff: """If peak and valley is separted by half duration, or half duration to the left is less than 30% of its value""" start_dates[-1] = int(flow_index[0]) mags[-1] = flow_index[1] break elif counter == len(minarray): start_dates[-1] = None mags[-1] = None break elif bool(minarray[counter][0] - maxarray[counter][0] < half_duration or maxarray[counter][0] - minarray[counter-1][0] < half_duration) and bool(flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff): """valley and peak are distanced by less than half dur from either side""" start_dates[-1] = int(flow_index[0]) mags[-1] = flow_index[1] break elif (spl(flow_index[0] - half_duration) - min_flow) / (flow_index[1] - min_flow) < flush_threshold_perc and (spl(flow_index[0] + half_duration) - min_flow) / (flow_index[1] - min_flow) < flush_threshold_perc and flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff: """both side of flow value at the peak + half duration index fall below flush_threshold_perc""" start_dates[-1] = int(flow_index[0]) mags[-1] = flow_index[1] break counter = counter + 1 """Check to see if last start_date falls behind the max_allowed_date""" if wet_dates[-1]: if bool(start_dates[-1] is None or start_dates[-1] > wet_dates[-1]) and wet_dates[-1]: start_dates[-1] = None mags[-1] = None """Get duration of each fall flush""" current_duration, left, right = calc_fall_flush_durations_2( filter_data, start_dates[-1]) durations[-1] = current_duration # _plotter(x_axis, flow_data, filter_data, broad_filter_data, start_dates, wet_dates, column_number, left, right, maxarray, minarray, min_flush_magnitude, slope_detection_data) return start_dates, mags, wet_dates, durations
def calc_spring_transition_timing_magnitude(flow_matrix, class_number, summer_timings, spring_params=def_spring_params): params = set_user_params(spring_params, def_spring_params) max_zero_allowed_per_year, max_nan_allowed_per_year, max_peak_flow_date, search_window_left, search_window_right, peak_sensitivity, peak_filter_percentage, min_max_flow_rate, window_sigma, fit_sigma, sensitivity, min_percentage_of_max_flow, lag_time, timing_cutoff, min_flow_rate = params.values( ) timings = [] magnitudes = [] for column_number, _ in enumerate(flow_matrix[0]): timings.append(None) magnitudes.append(None) """Check to see if water year has more than allowed nan or zeros""" if np.isnan(flow_matrix[:, column_number]).sum( ) > max_nan_allowed_per_year or np.count_nonzero( flow_matrix[:, column_number] == 0) > max_zero_allowed_per_year or max( flow_matrix[:, column_number]) < min_flow_rate: continue """Get flow data and interpolate between None values""" flow_data = flow_matrix[:, column_number] flow_data = replace_nan(flow_data) # Extract for use in optional plotting x_axis = list(range(len(flow_data))) current_sensitivity = sensitivity / 1000 if class_number == 4: """Use specialized smoothing sigma values for rain-dominated classes""" window_sigma = 2.5 """Use specialized relative peak magnitude requirements for rain-dominated classes""" peak_filter_percentage = .10 min_percentage_of_max_flow = .05 """Reduce sensitivity in rain-dominated gages""" max_peak_flow_date = 255 if class_number == 6: window_sigma = 2.5 peak_filter_percentage = .12 min_percentage_of_max_flow = 0.12 max_peak_flow_date = 255 if class_number == 7: window_sigma = 2 peak_filter_percentage = 0.05 min_percentage_of_max_flow = 0.05 max_peak_flow_date = 255 if class_number == 8: window_sigma = 2.5 peak_filter_percentage = .15 min_percentage_of_max_flow = .15 max_peak_flow_date = 255 """Using Gaussian with heavy sigma to smooth the curve""" filter_data = gaussian_filter1d(flow_data, window_sigma) """Find the peaks and valleys of the filtered data""" mean_flow = np.nanmean(filter_data) # Returns array with the index and flow magnitude for each peak and valley maxarray, minarray = peakdet(filter_data, mean_flow * peak_sensitivity) """Find the max flow in the curve and determine flow range requirements""" max_flow = np.nanmax(filter_data) max_flow_index = find_index(filter_data, max_flow) min_flow = np.nanmin(filter_data) flow_range = max_flow - min_flow """Identify rightmost peak that fulfills date and magnitude requirements""" for counter, flow_index in enumerate(reversed(maxarray)): if int(flow_index[0]) < max_peak_flow_date and ( flow_index[1] - min_flow) / flow_range > peak_filter_percentage: max_flow_index = int(flow_index[0]) break if np.nanmax(filter_data) < min_max_flow_rate: """Set start of spring index to the max flow index, when the annual max flow is below certain threshold. This is used for extremely low flows where seasonal timings are harder to find """ max_filter_data = np.nanmax(flow_data) timings[-1] = find_index(flow_data, max_filter_data) magnitudes[-1] = max_filter_data else: if max_flow_index < search_window_left: current_search_window_left = 0 else: current_search_window_left = search_window_left if max_flow_index > 366 - search_window_right: current_search_window_right = 366 - max_flow_index else: current_search_window_right = search_window_right """Get indices of windowed data""" max_flow_index_window = max( flow_data[max_flow_index - current_search_window_left:max_flow_index + current_search_window_right]) timings[-1] = find_index(flow_data, max_flow_index_window) magnitudes[-1] = max_flow_index_window """Gaussian filter again on the windowed data (smaller filter this time)""" x_axis_window = list( range(max_flow_index - current_search_window_left, max_flow_index + current_search_window_right)) flow_data_window = gaussian_filter1d( flow_data[max_flow_index - current_search_window_left:max_flow_index + current_search_window_right], fit_sigma) """If search window is too small, move on to next value in maxarray. If it is the last value in maxarray, proceed inside loop""" if len(flow_data_window) < 50: if counter != len(maxarray) - 1: continue """Fit a spline on top of the Gaussian curve""" spl = ip.UnivariateSpline(x_axis_window, flow_data_window, k=3, s=3) """Calculate the first derivative of the spline""" spl_first_deriv = spl.derivative(1) """Find where the derivative of the spline crosses zero""" index_zeros = crossings_nonzero_all(spl_first_deriv(x_axis_window)) """Offset the new index""" new_index = [] for index in index_zeros: new_index.append(max_flow_index - current_search_window_left + index) """Loop through the indices where derivative=0, from right to left""" for i in reversed(new_index): threshold = max(spl_first_deriv(x_axis_window)) max_flow_window = max(spl(x_axis_window)) min_flow_window = min(spl(x_axis_window)) range_window = max_flow_window - min_flow_window """Set spring timing as index which fulfills the following requirements""" if summer_timings[ column_number] is not None and i < summer_timings[ column_number] and i > timing_cutoff and spl( i ) - spl( i - 1 ) > threshold * current_sensitivity * 1 and spl( i - 1 ) - spl( i - 2 ) > threshold * current_sensitivity * 2 and spl( i - 2 ) - spl( i - 3 ) > threshold * current_sensitivity * 3 and spl( i - 3) - spl( i - 4 ) > threshold * current_sensitivity * 4 and ( spl(i) - min_flow_window ) / range_window > min_percentage_of_max_flow: timings[-1] = i break """Check if timings is before max flow index""" if timings[ -1] < max_flow_index: # replace max flow index with cutoff date timings[-1] = max_flow_index + lag_time """Find max flow 4 days before and 7 days ahead. Assign as new start date""" if len(flow_data[timings[-1] - 4:timings[-1] + 7]) > 10: max_flow_window_new = max(flow_data[timings[-1] - 4:timings[-1] + 7]) new_timings = find_index( flow_data[timings[-1] - 4:timings[-1] + 7], max_flow_window_new) timings[-1] = timings[-1] - 4 + new_timings + lag_time magnitudes[-1] = max_flow_window_new if summer_timings[column_number] is not None and timings[ -1] > summer_timings[column_number]: timings[-1] = None #_spring_transition_plotter(x_axis, flow_data, filter_data, x_axis_window, spl_first_deriv, new_index, max_flow_index, timings, current_search_window_left, current_search_window_right, spl, column_number, maxarray) return timings, magnitudes
def calc_winter_highflow_annual(matrix, exceedance_percent, winter_params = def_winter_params): params = set_user_params(winter_params, def_winter_params) max_zero_allowed_per_year, max_nan_allowed_per_year = params.values() """Get peak percentiles calculated from each year's peak flow values""" peak_flows = [] peak_percentiles = [2,5,10,20,50] # for peak flow metrics high_percentiles = [2,5,10,20] # for high flow metrics peak_exceedance_values = [] highflow_exceedance_values = [] for column_number, _ in enumerate(matrix[0]): flow_data = matrix[:, column_number] peak_flows.append(np.nanmax(flow_data)) for percentile in peak_percentiles: peak_exceedance_values.append(np.nanpercentile(peak_flows, 100 - percentile)) """Add high flow percentiles and peak flow exceedance vals together for final list of exceedance values""" highflow_exceedance_values = [] for i in high_percentiles: highflow_exceedance_values.append(np.nanpercentile(matrix, 100 - i)) exceedance_values = peak_exceedance_values + highflow_exceedance_values # five peak exceedance vals plus four high flow exceedance vals exceedance_value = {} freq = {} duration = {} timing = {} magnitude = {} peak_magnitude = {} for i, value in enumerate(exceedance_values): exceedance_value[i] = value freq[i] = [] duration[i] = [] timing[i] = [] magnitude[i] = [] peak_magnitude[i] = [] for column_number, flow_column in enumerate(matrix[0]): if np.isnan(matrix[:, column_number]).sum() > max_nan_allowed_per_year or np.count_nonzero(matrix[:, column_number] == 0) > max_zero_allowed_per_year: for i, value in enumerate(exceedance_values): freq[i].append(None) duration[i].append(None) timing[i].append(None) magnitude[i].append(None) peak_magnitude[i].append(None) continue exceedance_object = {} exceedance_duration = {} current_flow_object = {} peak_flow = {} """Init current flow object""" for i, value in enumerate(exceedance_values): exceedance_object[i] = [] exceedance_duration[i] = [] current_flow_object[i] = None peak_flow[i] = [] """Loop through each flow value for the year to check if they pass exceedance threshold""" for row_number, flow_row in enumerate(matrix[:, column_number]): for i, value in enumerate(exceedance_values): if bool(flow_row < exceedance_value[i] and current_flow_object[i]) or bool(row_number == len(matrix[:, column_number]) - 1 and current_flow_object[i]): """End of an object if it falls below threshold, or end of column""" current_flow_object[i].end_date = row_number + 1 current_flow_object[i].get_max_magnitude() exceedance_duration[i].append(current_flow_object[i].duration) peak_flow[i] = np.nanmax(flow_row) current_flow_object[i] = None elif flow_row >= exceedance_value[i]: if not current_flow_object[i]: """Beginning of an object""" exceedance_object[i].append( FlowExceedance(row_number, None, 1, i)) current_flow_object[i] = exceedance_object[i][-1] current_flow_object[i].add_flow(flow_row) else: """Continuing an object""" current_flow_object[i].add_flow(flow_row) current_flow_object[i].duration = current_flow_object[i].duration + 1 for i, value in enumerate(exceedance_values): freq[i].append(None if len(exceedance_object[i])==0 else len(exceedance_object[i])) duration[i].append( None if np.nansum(exceedance_duration[i])==0 else np.nansum(exceedance_duration[i]) if not np.isnan(np.nansum(exceedance_duration[i])) else None) timing[i].append(median_of_time(exceedance_object[i])) magnitude[i].append(exceedance_value[i]) _timing = {2: timing[0], 5: timing[1], 10: timing[2], 20: timing[3], 50: timing[4], 12: timing[5], 15: timing[6], 110: timing[7], 120: timing[8],} _duration = {2: duration[0], 5: duration[1], 10: duration[2], 20: duration[3], 50: duration[4], 12: duration[5], 15: duration[6], 110: duration[7], 120: duration[8],} _freq = {2: freq[0], 5: freq[1], 10: freq[2], 20: freq[3], 50: freq[4], 12: freq[5], 15: freq[6], 110: freq[7], 120: freq[8],} _magnitude = {2: magnitude[0], 5: magnitude[1], 10: magnitude[2], 20: magnitude[3], 50: magnitude[4], 12: magnitude[5], 15: magnitude[6], 110: magnitude[7], 120: magnitude[8],} return _timing, _duration, _freq, _magnitude
def calc_start_of_summer(matrix, class_number, summer_params=def_summer_params): """Set adjustable parameters for start of summer date detection""" params = set_user_params(summer_params, def_summer_params) max_zero_allowed_per_year, max_nan_allowed_per_year, sigma, sensitivity, peak_sensitivity, max_peak_flow_date, min_summer_flow_percent, min_flow_rate = params.values( ) start_dates = [] for column_number, flow_data in enumerate(matrix[0]): start_dates.append(None) """Check if data has too many zeros or NaN, and if so skip to next water year""" if pd.isnull(matrix[:, column_number]).sum( ) > max_nan_allowed_per_year or np.count_nonzero( matrix[:, column_number] == 0) > max_zero_allowed_per_year or max( matrix[:, column_number]) < min_flow_rate: continue """Append each column with 30 more days from next column, except the last column""" if column_number != len(matrix[0]) - 1: flow_data = list(matrix[:, column_number]) + \ list(matrix[:100, column_number+1]) else: flow_data = matrix[:, column_number] """Replace any NaNs with previous day's flow""" flow_data = replace_nan(flow_data) """Set specific parameters for rain-dominated classes""" if class_number == 4 or 6 or 7 or 8: sensitivity = 1100 peak_sensitivity = .1 sigma = 4 """Smooth out the timeseries""" smooth_data = gaussian_filter1d(flow_data, sigma) x_axis = list(range(len(smooth_data))) """Find spline fit equation for smoothed timeseries, and find derivative of spline""" spl = ip.UnivariateSpline(x_axis, smooth_data, k=3, s=3) spl_first = spl.derivative(1) max_flow_data = max(smooth_data[:366]) max_flow_index = find_index(smooth_data, max_flow_data) """Find the major peaks of the filtered data""" mean_flow = np.nanmean(flow_data) maxarray, minarray = peakdet(smooth_data, mean_flow * peak_sensitivity) """Set search range after last smoothed peak flow""" for flow_index in reversed(maxarray): if int(flow_index[0]) < max_peak_flow_date: max_flow_index = int(flow_index[0]) break """Set a magnitude threshold below which start of summer can begin""" min_flow_data = min(smooth_data[max_flow_index:366]) threshold = min_flow_data + \ (smooth_data[max_flow_index] - min_flow_data) * \ min_summer_flow_percent current_sensitivity = 1 / sensitivity start_dates[-1] = None for index, data in enumerate(smooth_data): if index == len(smooth_data) - 2: break """Search criteria: derivative is under rate of change threshold, date is after last major peak, and flow is less than specified percent of smoothed max flow""" if abs( spl_first(index) ) < max_flow_data * current_sensitivity and index > max_flow_index and data < threshold: start_dates[-1] = index break # _summer_baseflow_plot(x_axis, column_number, flow_data, spl, spl_first, start_dates, threshold, max_flow_index, maxarray) return start_dates