示例#1
0
def calc_all_year(flow_matrix, general_params=def_gen_params):
    key = 'max_nan_allowed_per_year'

    params = set_user_params(general_params, def_gen_params)

    max_nan_allowed_per_year = params[key]

    average_annual_flows = []
    standard_deviations = []
    coefficient_variations = []
    for index, _ in enumerate(flow_matrix[0]):
        average_annual_flows.append(None)
        standard_deviations.append(None)
        coefficient_variations.append(None)
        """Check to see if water year has more than allowed nan or zeros"""
        if np.isnan(flow_matrix[:, index]).sum() > max_nan_allowed_per_year:
            continue

        average_annual_flows[-1] = np.nanmean(flow_matrix[:, index])
        standard_deviations[-1] = np.nanstd(flow_matrix[:, index])
        coefficient_variations[-1] = standard_deviations[-1] / \
            average_annual_flows[-1]

    return average_annual_flows, standard_deviations, coefficient_variations
示例#2
0
def calc_fall_flush_timings_durations(flow_matrix, summer_timings, class_number, fall_params=def_fall_params):

    params = set_user_params(fall_params, def_fall_params)

    max_zero_allowed_per_year, max_nan_allowed_per_year, min_flow_rate, sigma, broad_sigma, wet_season_sigma, peak_sensitivity, peak_sensitivity_wet, max_flush_duration, min_flush_percentage, wet_threshold_perc, peak_detect_perc, flush_threshold_perc, min_flush_threshold, date_cutoff, slope_sensitivity = params.values()

    start_dates = []
    wet_dates = []
    durations = []
    mags = []

    for column_number, _ in enumerate(flow_matrix[0]):

        start_dates.append(None)
        wet_dates.append(None)
        durations.append(None)
        mags.append(None)

        """Check to see if water year has more than allowed nan or zeros"""
        if np.isnan(flow_matrix[:, column_number]).sum() > max_nan_allowed_per_year or np.count_nonzero(flow_matrix[:, column_number] == 0) > max_zero_allowed_per_year or max(flow_matrix[:, column_number]) < min_flow_rate:
            continue

        """Get flow data"""
        flow_data = flow_matrix[:, column_number]
        x_axis = list(range(len(flow_data)))

        """Interpolate between None values"""
        flow_data = replace_nan(flow_data)

        """Return to Wet Season"""
        if class_number == 3 or class_number == 4 or class_number == 5 or class_number == 6 or class_number == 7 or class_number == 8:
            wet_season_filter_data = gaussian_filter1d(flow_data, 6)
        else:
            wet_season_filter_data = gaussian_filter1d(flow_data, wet_season_sigma)
        broad_filter_data = gaussian_filter1d(flow_data, broad_sigma)
        if class_number == 1 or class_number == 2 or class_number == 9:
            slope_detection_data = gaussian_filter1d(flow_data, 7)
        elif class_number == 3 or class_number == 4 or class_number == 5 or class_number == 6 or class_number == 7 or class_number == 8: 
            slope_detection_data = gaussian_filter1d(flow_data, 1)
        else:
            slope_detection_data = gaussian_filter1d(flow_data, 4)

        return_date = return_to_wet_date(flow_data, wet_season_filter_data, broad_filter_data, slope_detection_data, 
                                         wet_threshold_perc, peak_detect_perc, peak_sensitivity_wet, column_number, slope_sensitivity)
        if return_date:
            wet_dates[-1] = return_date 
        broad_filter_data = gaussian_filter1d(flow_data, broad_sigma)

        """Filter noise data with small sigma to find fall flush hump"""
        filter_data = gaussian_filter1d(flow_data, sigma)

        """Fit spline"""
        x_axis = list(range(len(filter_data)))
        spl = ip.UnivariateSpline(x_axis, filter_data, k=3, s=3)

        """Find the peaks and valleys of the filtered data"""
        mean_flow = np.nanmean(filter_data)
        maxarray, minarray = peakdet(spl(x_axis), mean_flow * peak_sensitivity)

        """Find max and min of filtered flow data"""
        max_flow = max(filter_data[20:])
        max_flow_index = find_index(filter_data[20:], max_flow) + 20

        min_flow = min(broad_filter_data[:max_flow_index])

        """If could not find any max and find"""
        if not list(maxarray) or not list(minarray) or minarray[0][0] > max_flow_index:
            continue

        """Get flow magnitude threshold from previous summer's baseflow"""
        if column_number == 0:
            wet_date = wet_dates[0]
            baseflow = list(flow_matrix[:wet_date, column_number])
            # bs_mean = np.mean(baseflow)
            bs_med = np.nanpercentile(baseflow, 50)
        else:
            summer_date = summer_timings[column_number - 1]
            if wet_dates[column_number]:
                if wet_dates[column_number] > 20:
                    wet_date = wet_dates[column_number] - 20
                else:
                    wet_date = wet_dates[column_number]
                baseflow = list(flow_matrix[summer_date:, column_number - 1]) + list(flow_matrix[:wet_date, column_number])
                # bs_mean = np.mean(baseflow)
            else:
                baseflow = list(flow_matrix[summer_date:, column_number - 1])
            bs_med = np.nanpercentile(baseflow, 50)

        """Get fall flush peak"""
        counter = 0
        # Only test duration for first half of fall flush peak
        half_duration = int(max_flush_duration/2)
        if bs_med > 25:
            # if median baseflow is large (>25), magnitude threshold is 50% above median baseflow of previous summer
            min_flush_magnitude = bs_med * 1.5
        else:
            # otherwise magnitude threshold is 100% above median baseflow of previous summer
            min_flush_magnitude = bs_med * 2
        if min_flush_magnitude < min_flush_threshold:
            min_flush_magnitude = min_flush_threshold
        for flow_index in maxarray:

            if counter == 0:
                if flow_index[0] < half_duration and flow_index[0] != 0 and flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff:
                    """if index found is before the half duration allowed"""
                    start_dates[-1] = int(flow_index[0])
                    mags[-1] = flow_index[1]
                    break
                elif bool((flow_index[1] - spl(maxarray[counter][0] - half_duration)) / flow_index[1] > flush_threshold_perc or minarray[counter][0] - maxarray[counter][0] < half_duration) and flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff:
                    """If peak and valley is separted by half duration, or half duration to the left is less than 30% of its value"""
                    start_dates[-1] = int(flow_index[0])
                    mags[-1] = flow_index[1]
                    break
            elif counter == len(minarray):
                start_dates[-1] = None
                mags[-1] = None
                break
            elif bool(minarray[counter][0] - maxarray[counter][0] < half_duration or maxarray[counter][0] - minarray[counter-1][0] < half_duration) and bool(flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff):
                """valley and peak are distanced by less than half dur from either side"""
                start_dates[-1] = int(flow_index[0])
                mags[-1] = flow_index[1]
                break
            elif (spl(flow_index[0] - half_duration) - min_flow) / (flow_index[1] - min_flow) < flush_threshold_perc and (spl(flow_index[0] + half_duration) - min_flow) / (flow_index[1] - min_flow) < flush_threshold_perc and flow_index[1] > broad_filter_data[int(flow_index[0])] and flow_index[1] > min_flush_magnitude and flow_index[0] <= date_cutoff:
                """both side of flow value at the peak + half duration index fall below flush_threshold_perc"""
                start_dates[-1] = int(flow_index[0])
                mags[-1] = flow_index[1]
                break
            counter = counter + 1

        """Check to see if last start_date falls behind the max_allowed_date"""
        if wet_dates[-1]:
            if bool(start_dates[-1] is None or start_dates[-1] > wet_dates[-1]) and wet_dates[-1]:
                start_dates[-1] = None
                mags[-1] = None

        """Get duration of each fall flush"""
        current_duration, left, right = calc_fall_flush_durations_2(
            filter_data, start_dates[-1])
        durations[-1] = current_duration
        # _plotter(x_axis, flow_data, filter_data, broad_filter_data, start_dates, wet_dates, column_number, left, right, maxarray, minarray, min_flush_magnitude, slope_detection_data)
        
    return start_dates, mags, wet_dates, durations
示例#3
0
def calc_spring_transition_timing_magnitude(flow_matrix,
                                            class_number,
                                            summer_timings,
                                            spring_params=def_spring_params):
    params = set_user_params(spring_params, def_spring_params)

    max_zero_allowed_per_year, max_nan_allowed_per_year, max_peak_flow_date, search_window_left, search_window_right, peak_sensitivity, peak_filter_percentage, min_max_flow_rate, window_sigma, fit_sigma, sensitivity, min_percentage_of_max_flow, lag_time, timing_cutoff, min_flow_rate = params.values(
    )

    timings = []
    magnitudes = []

    for column_number, _ in enumerate(flow_matrix[0]):

        timings.append(None)
        magnitudes.append(None)
        """Check to see if water year has more than allowed nan or zeros"""
        if np.isnan(flow_matrix[:, column_number]).sum(
        ) > max_nan_allowed_per_year or np.count_nonzero(
                flow_matrix[:, column_number] ==
                0) > max_zero_allowed_per_year or max(
                    flow_matrix[:, column_number]) < min_flow_rate:
            continue
        """Get flow data and interpolate between None values"""
        flow_data = flow_matrix[:, column_number]
        flow_data = replace_nan(flow_data)
        # Extract for use in optional plotting
        x_axis = list(range(len(flow_data)))

        current_sensitivity = sensitivity / 1000

        if class_number == 4:
            """Use specialized smoothing sigma values for rain-dominated classes"""
            window_sigma = 2.5
            """Use specialized relative peak magnitude requirements for rain-dominated classes"""
            peak_filter_percentage = .10
            min_percentage_of_max_flow = .05
            """Reduce sensitivity in rain-dominated gages"""
            max_peak_flow_date = 255
        if class_number == 6:
            window_sigma = 2.5
            peak_filter_percentage = .12
            min_percentage_of_max_flow = 0.12
            max_peak_flow_date = 255
        if class_number == 7:
            window_sigma = 2
            peak_filter_percentage = 0.05
            min_percentage_of_max_flow = 0.05
            max_peak_flow_date = 255
        if class_number == 8:
            window_sigma = 2.5
            peak_filter_percentage = .15
            min_percentage_of_max_flow = .15
            max_peak_flow_date = 255
        """Using Gaussian with heavy sigma to smooth the curve"""
        filter_data = gaussian_filter1d(flow_data, window_sigma)
        """Find the peaks and valleys of the filtered data"""
        mean_flow = np.nanmean(filter_data)
        # Returns array with the index and flow magnitude for each peak and valley
        maxarray, minarray = peakdet(filter_data, mean_flow * peak_sensitivity)
        """Find the max flow in the curve and determine flow range requirements"""
        max_flow = np.nanmax(filter_data)
        max_flow_index = find_index(filter_data, max_flow)
        min_flow = np.nanmin(filter_data)
        flow_range = max_flow - min_flow
        """Identify rightmost peak that fulfills date and magnitude requirements"""
        for counter, flow_index in enumerate(reversed(maxarray)):
            if int(flow_index[0]) < max_peak_flow_date and (
                    flow_index[1] -
                    min_flow) / flow_range > peak_filter_percentage:
                max_flow_index = int(flow_index[0])
                break

        if np.nanmax(filter_data) < min_max_flow_rate:
            """Set start of spring index to the max flow index, when the annual max flow is below certain threshold.
            This is used for extremely low flows where seasonal timings are harder to find
            """
            max_filter_data = np.nanmax(flow_data)
            timings[-1] = find_index(flow_data, max_filter_data)
            magnitudes[-1] = max_filter_data
        else:
            if max_flow_index < search_window_left:
                current_search_window_left = 0
            else:
                current_search_window_left = search_window_left
            if max_flow_index > 366 - search_window_right:
                current_search_window_right = 366 - max_flow_index
            else:
                current_search_window_right = search_window_right
            """Get indices of windowed data"""
            max_flow_index_window = max(
                flow_data[max_flow_index -
                          current_search_window_left:max_flow_index +
                          current_search_window_right])
            timings[-1] = find_index(flow_data, max_flow_index_window)
            magnitudes[-1] = max_flow_index_window
            """Gaussian filter again on the windowed data (smaller filter this time)"""
            x_axis_window = list(
                range(max_flow_index - current_search_window_left,
                      max_flow_index + current_search_window_right))
            flow_data_window = gaussian_filter1d(
                flow_data[max_flow_index -
                          current_search_window_left:max_flow_index +
                          current_search_window_right], fit_sigma)
            """If search window is too small, move on to next value in maxarray. If it is the last value in maxarray, proceed inside loop"""
            if len(flow_data_window) < 50:
                if counter != len(maxarray) - 1:
                    continue
            """Fit a spline on top of the Gaussian curve"""
            spl = ip.UnivariateSpline(x_axis_window,
                                      flow_data_window,
                                      k=3,
                                      s=3)
            """Calculate the first derivative of the spline"""
            spl_first_deriv = spl.derivative(1)
            """Find where the derivative of the spline crosses zero"""
            index_zeros = crossings_nonzero_all(spl_first_deriv(x_axis_window))
            """Offset the new index"""
            new_index = []
            for index in index_zeros:
                new_index.append(max_flow_index - current_search_window_left +
                                 index)
            """Loop through the indices where derivative=0, from right to left"""
            for i in reversed(new_index):
                threshold = max(spl_first_deriv(x_axis_window))
                max_flow_window = max(spl(x_axis_window))
                min_flow_window = min(spl(x_axis_window))
                range_window = max_flow_window - min_flow_window
                """Set spring timing as index which fulfills the following requirements"""
                if summer_timings[
                        column_number] is not None and i < summer_timings[
                            column_number] and i > timing_cutoff and spl(
                                i
                            ) - spl(
                                i - 1
                            ) > threshold * current_sensitivity * 1 and spl(
                                i - 1
                            ) - spl(
                                i - 2
                            ) > threshold * current_sensitivity * 2 and spl(
                                i - 2
                            ) - spl(
                                i - 3
                            ) > threshold * current_sensitivity * 3 and spl(
                                i - 3) - spl(
                                    i - 4
                                ) > threshold * current_sensitivity * 4 and (
                                    spl(i) - min_flow_window
                                ) / range_window > min_percentage_of_max_flow:
                    timings[-1] = i
                    break
            """Check if timings is before max flow index"""
            if timings[
                    -1] < max_flow_index:  # replace max flow index with cutoff date
                timings[-1] = max_flow_index + lag_time
            """Find max flow 4 days before and 7 days ahead. Assign as new start date"""
            if len(flow_data[timings[-1] - 4:timings[-1] + 7]) > 10:
                max_flow_window_new = max(flow_data[timings[-1] -
                                                    4:timings[-1] + 7])
                new_timings = find_index(
                    flow_data[timings[-1] - 4:timings[-1] + 7],
                    max_flow_window_new)
                timings[-1] = timings[-1] - 4 + new_timings + lag_time
                magnitudes[-1] = max_flow_window_new

            if summer_timings[column_number] is not None and timings[
                    -1] > summer_timings[column_number]:
                timings[-1] = None

            #_spring_transition_plotter(x_axis, flow_data, filter_data, x_axis_window, spl_first_deriv, new_index, max_flow_index, timings, current_search_window_left, current_search_window_right, spl, column_number, maxarray)

    return timings, magnitudes
示例#4
0
def calc_winter_highflow_annual(matrix, exceedance_percent, winter_params = def_winter_params):

    params = set_user_params(winter_params, def_winter_params)

    max_zero_allowed_per_year, max_nan_allowed_per_year = params.values()

    """Get peak percentiles calculated from each year's peak flow values"""
    peak_flows = []
    peak_percentiles = [2,5,10,20,50] # for peak flow metrics
    high_percentiles = [2,5,10,20] # for high flow metrics

    peak_exceedance_values = []
    highflow_exceedance_values = []
    for column_number, _ in enumerate(matrix[0]):
        flow_data = matrix[:, column_number]
        peak_flows.append(np.nanmax(flow_data))
    for percentile in peak_percentiles:
        peak_exceedance_values.append(np.nanpercentile(peak_flows, 100 - percentile))

    """Add high flow percentiles and peak flow exceedance vals together for final list of exceedance values"""
    highflow_exceedance_values = []
    for i in high_percentiles:
        highflow_exceedance_values.append(np.nanpercentile(matrix, 100 - i))

    exceedance_values = peak_exceedance_values + highflow_exceedance_values # five peak exceedance vals plus four high flow exceedance vals

    exceedance_value = {}
    freq = {}
    duration = {}
    timing = {}
    magnitude = {}
    peak_magnitude = {}

    for i, value in enumerate(exceedance_values):
        exceedance_value[i] = value
        freq[i] = []
        duration[i] = []
        timing[i] = []
        magnitude[i] = []
        peak_magnitude[i] = []

    for column_number, flow_column in enumerate(matrix[0]):
        if np.isnan(matrix[:, column_number]).sum() > max_nan_allowed_per_year or np.count_nonzero(matrix[:, column_number] == 0) > max_zero_allowed_per_year:
            for i, value in enumerate(exceedance_values):
                freq[i].append(None)
                duration[i].append(None)
                timing[i].append(None)
                magnitude[i].append(None)
                peak_magnitude[i].append(None)
            continue

        exceedance_object = {}
        exceedance_duration = {}
        current_flow_object = {}
        peak_flow = {}

        """Init current flow object"""
        for i, value in enumerate(exceedance_values):
            exceedance_object[i] = []
            exceedance_duration[i] = []
            current_flow_object[i] = None
            peak_flow[i] = []

        """Loop through each flow value for the year to check if they pass exceedance threshold"""
        for row_number, flow_row in enumerate(matrix[:, column_number]):

            for i, value in enumerate(exceedance_values):
                if bool(flow_row < exceedance_value[i] and current_flow_object[i]) or bool(row_number == len(matrix[:, column_number]) - 1 and current_flow_object[i]):
                    """End of an object if it falls below threshold, or end of column"""
                    current_flow_object[i].end_date = row_number + 1
                    current_flow_object[i].get_max_magnitude()
                    exceedance_duration[i].append(current_flow_object[i].duration)
                    peak_flow[i] = np.nanmax(flow_row)
                    current_flow_object[i] = None

                elif flow_row >= exceedance_value[i]:
                    if not current_flow_object[i]:
                        """Beginning of an object"""
                        exceedance_object[i].append(
                            FlowExceedance(row_number, None, 1, i))
                        current_flow_object[i] = exceedance_object[i][-1]
                        current_flow_object[i].add_flow(flow_row)
                    else:
                        """Continuing an object"""
                        current_flow_object[i].add_flow(flow_row)
                        current_flow_object[i].duration = current_flow_object[i].duration + 1
        for i, value in enumerate(exceedance_values):
            freq[i].append(None if len(exceedance_object[i])==0 else len(exceedance_object[i]))
            duration[i].append(
                None if np.nansum(exceedance_duration[i])==0 else np.nansum(exceedance_duration[i]) if not np.isnan(np.nansum(exceedance_duration[i])) else None)
            timing[i].append(median_of_time(exceedance_object[i]))
            magnitude[i].append(exceedance_value[i])

    _timing = {2: timing[0], 5: timing[1], 10: timing[2], 20: timing[3], 50: timing[4], 12: timing[5], 15: timing[6], 110: timing[7], 120: timing[8],}
    _duration = {2: duration[0], 5: duration[1], 10: duration[2], 20: duration[3], 50: duration[4], 12: duration[5], 15: duration[6], 110: duration[7], 120: duration[8],}
    _freq = {2: freq[0], 5: freq[1], 10: freq[2], 20: freq[3], 50: freq[4], 12: freq[5], 15: freq[6], 110: freq[7], 120: freq[8],}
    _magnitude = {2: magnitude[0], 5: magnitude[1], 10: magnitude[2], 20: magnitude[3], 50: magnitude[4], 12: magnitude[5], 15: magnitude[6], 110: magnitude[7], 120: magnitude[8],}
    return _timing, _duration, _freq, _magnitude
def calc_start_of_summer(matrix,
                         class_number,
                         summer_params=def_summer_params):
    """Set adjustable parameters for start of summer date detection"""
    params = set_user_params(summer_params, def_summer_params)

    max_zero_allowed_per_year, max_nan_allowed_per_year, sigma, sensitivity, peak_sensitivity, max_peak_flow_date, min_summer_flow_percent, min_flow_rate = params.values(
    )

    start_dates = []
    for column_number, flow_data in enumerate(matrix[0]):
        start_dates.append(None)
        """Check if data has too many zeros or NaN, and if so skip to next water year"""
        if pd.isnull(matrix[:, column_number]).sum(
        ) > max_nan_allowed_per_year or np.count_nonzero(
                matrix[:,
                       column_number] == 0) > max_zero_allowed_per_year or max(
                           matrix[:, column_number]) < min_flow_rate:
            continue
        """Append each column with 30 more days from next column, except the last column"""
        if column_number != len(matrix[0]) - 1:
            flow_data = list(matrix[:, column_number]) + \
                list(matrix[:100, column_number+1])
        else:
            flow_data = matrix[:, column_number]
        """Replace any NaNs with previous day's flow"""
        flow_data = replace_nan(flow_data)
        """Set specific parameters for rain-dominated classes"""
        if class_number == 4 or 6 or 7 or 8:
            sensitivity = 1100
            peak_sensitivity = .1
            sigma = 4
        """Smooth out the timeseries"""
        smooth_data = gaussian_filter1d(flow_data, sigma)
        x_axis = list(range(len(smooth_data)))
        """Find spline fit equation for smoothed timeseries, and find derivative of spline"""
        spl = ip.UnivariateSpline(x_axis, smooth_data, k=3, s=3)
        spl_first = spl.derivative(1)

        max_flow_data = max(smooth_data[:366])
        max_flow_index = find_index(smooth_data, max_flow_data)
        """Find the major peaks of the filtered data"""
        mean_flow = np.nanmean(flow_data)
        maxarray, minarray = peakdet(smooth_data, mean_flow * peak_sensitivity)
        """Set search range after last smoothed peak flow"""
        for flow_index in reversed(maxarray):
            if int(flow_index[0]) < max_peak_flow_date:
                max_flow_index = int(flow_index[0])
                break
        """Set a magnitude threshold below which start of summer can begin"""
        min_flow_data = min(smooth_data[max_flow_index:366])
        threshold = min_flow_data + \
            (smooth_data[max_flow_index] - min_flow_data) * \
            min_summer_flow_percent

        current_sensitivity = 1 / sensitivity
        start_dates[-1] = None
        for index, data in enumerate(smooth_data):
            if index == len(smooth_data) - 2:
                break
            """Search criteria: derivative is under rate of change threshold, date is after last major peak, and flow is less than specified percent of smoothed max flow"""
            if abs(
                    spl_first(index)
            ) < max_flow_data * current_sensitivity and index > max_flow_index and data < threshold:
                start_dates[-1] = index
                break

        # _summer_baseflow_plot(x_axis, column_number, flow_data, spl, spl_first, start_dates, threshold, max_flow_index, maxarray)

    return start_dates