示例#1
0
def analyzeSinglePoint(x, y):
     # get the slope, intercept and pvalues from the mklt module
    ALPHA = 0.05
    #        MK : string
    #     result of the statistical test indicating whether or not to accept hte
    #     alternative hypothesis 'Ha'
    # m : scalar, float
    #     slope of the linear fit to the data
    # c : scalar, float
    #     intercept of the linear fit to the data
    # p : scalar, float, greater than zero
    #     p-value of the obtained Z-score statistic for the Mann-Kendall test

    Zmk, MK, m, c, p = mkt.test(x, y, eps=1E-3, alpha=ALPHA, Ha="upordown")

    ha = 1
    if MK.startswith('rej'):
        ha = 0
    # ha = not MK.startswith('reject')
    res = {
        'zmk': Zmk,
        'ha': ha,
        'm': m,
        'c': c,
        'p': p,
    }

    return res
示例#2
0
def show_examples():
    """
    Returns the MK test results for artificial data.
    """
    # create artificial time series with trend
    n = 1000
    C = [0.01, 0.001, -0.001, -0.01]
    e = 1.00
    t = np.linspace(0., 500, n)

    # set up figure
    fig, axes = pl.subplots(nrows=2, ncols=2, figsize=[16.00, 9.00])

    # loop through various values of correlation
    ALPHA = 0.01
    for c, ax in zip(C, axes.flatten()):
        # estimate the measurements 'x'
        x = c * t +  e * np.random.randn(n)
        x = np.round(x, 2)

        # get the slope, intercept and pvalues from the mklt module
        MK, m, c, p = mkt.test(t, x, eps=1E-3, alpha=ALPHA, Ha="upordown")

        # plot results
        ax.plot(t, x, "k.-", label="Sampled time series")
        ax.plot(t, m * t + c, "r-", label="Linear fit")
        ax.set_title(MK.upper() + "\np=%.3f, alpha = %.2f" % (p, ALPHA),
                     fontweight="bold", fontsize=10)

        # prettify
        if ax.is_last_row():
            ax.legend(loc="upper right")
            ax.set_xlabel("Time")
        if ax.is_first_col():
            ax.set_ylabel(r"Measurements $x$")
        if ax.is_first_row():
            ax.legend(loc="upper left")

    # save/show plot
    pl.show(fig)
    return None
示例#3
0
    def trendTest(self,
                  time_scale,
                  least_records,
                  target_alpha,
                  plot=False):  #HKM added / Jul.30.2020
        if self.data is None:
            self.data = self.getDailyDischarge()

        t_Q = self.data.rename(
            columns={'Flow ({})'.format(self.getUnit()): 'Flow'})
        reason = "no issues"
        t_Q.Date = pd.to_datetime(t_Q.Date)
        # If we have a date gap, we should modify this code lines
        # Here, I assume that USGS provides continuous data
        valid_flag = True
        if time_scale == 'M':  # Monthly trend
            t_Q_aggr = t_Q.groupby(t_Q.Date.dt.strftime('%Y-%m')).Flow.agg(
                ['mean'])
            if len(
                    t_Q_aggr
            ) < least_records:  # We should have more than 10-year lenth of data
                valid_flag = False
                reason = "data shortage"
                print(
                    f'    Data at this gage has records shorter than your defined {least_records} months.\n'
                )

        elif time_scale == 'Y':  # Yearly trend
            t_Q_aggr = t_Q.groupby(t_Q.Date.dt.strftime('%Y')).Flow.agg(
                ['mean'])
            if len(
                    t_Q_aggr
            ) < least_records:  # We should have more than 10-year lenth of data
                valid_flag = False
                reason = "data shortage"
                print(
                    f'    Data at this gage has records shorter than your defined {least_records} years.\n'
                )

        else:
            raise Exception(
                'Invalid time scale. Please select M (monthly trend) or Y (yearly trend)'
            )

        if valid_flag:
            x = np.arange((len(t_Q_aggr)))
            y = t_Q_aggr.to_numpy().ravel()

            # Theilslopes
            R_TS = stats.theilslopes(y, x, alpha=1 - target_alpha)
            """            
            Ruetunrs:
            1) medslope : float
                Theil slope.
            2) medintercept : float
                Intercept of the Theil line, as median(y) - medslope*median(x).
            3) lo_slope : float
                Lower bound of the confidence interval on medslope.
            4) up_slope : float
                Upper bound of the confidence interval on medslope.
            https://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.mstats.theilslopes.html
            """

            # Mann Kendall Trend Test
            R_MK = mkt.test(x, y, 1, target_alpha, "upordown")
            """
            Returns
            1) MK : string
                result of the statistical test indicating whether or not to accept hte
                alternative hypothesis 'Ha'
            2) m : scalar, float
                slope of the linear fit to the data
            3) c : scalar, float
                intercept of the linear fit to the data
            4) p : scalar, float, greater than zero
                p-value of the obtained Z-score statistic for the Mann-Kendall test
            # https://up-rs-esp.github.io/mkt/_modules/mkt.html
            """
            if (R_MK[3] < target_alpha) & (R_MK[1] > 0):
                trend_result = 1  # increasing trend
                slope_result = R_TS[0]
            elif (R_MK[3] < target_alpha) & (R_MK[1] < 0):
                trend_result = -1  # decreasing trend
                slope_result = R_TS[0]
            else:
                trend_result = 0  # no trend
                slope_result = 0
        else:  # Any cases we cannot conduct the trend analysis
            trend_result = np.nan
            slope_result = np.nan
            R_TS = np.nan
            R_MK = np.nan
            reason = "other issues rather than the data shortage"

        if plot:  # monthly or yearly plot with the regression line
            if (trend_result == -1) | (trend_result == 1):
                fig, ax = plt.subplots(figsize=(15, 5))
                ax.plot(t_aggr_date,
                        y,
                        t_aggr_date,
                        R_TS[0] * np.arange(len(t_aggr_date)) + R_TS[1],
                        'r--',
                        linewidth=2)
                ax.set_xlabel('Date', fontsize=12)
                ax.set_ylabel('Discharge {}'.format(self.getUnit()),
                              fontsize=12)
                ax.set_title('Discharge at USGS {}'.format(self.id),
                             fontsize=16)

            elif trend_result == 0:
                fig, ax = plt.subplots(figsize=(15, 5))
                ax.plot(t_aggr_date, y, linewidth=2)
                ax.set_xlabel('Date', fontsize=12)
                ax.set_ylabel('Discharge {}'.format(self.getUnit()),
                              fontsize=12)
                ax.set_title('Discharge at USGS {}'.format(self.id),
                             fontsize=16)
                plt.text(t_aggr_date[round(len(t_aggr_date) / 2)],
                         (max(y) - min(y)) / 2,
                         "No Trend",
                         size=50,
                         rotation=30.,
                         ha="center",
                         va="center",
                         bbox=dict(
                             boxstyle="round",
                             ec=(1., 0.5, 0.5),
                             fc=(1., 0.8, 0.8),
                         ))

            else:
                raise Exception('Not enough data to plot')

        return trend_result, slope_result, R_TS, R_MK, reason
    def trendTest(self, time_scale, target_alpha):  #HKM added / Jul.30.2020

        if self.data is None:
            self.data = self.getDailyDischarge()

        t_Q = self.data.rename(
            columns={'Flow ({})'.format(self.getUnit()): 'Flow'})

        # If we have a date gap, we should modify this code lines
        # Here, I assume that USGS provides continuous data
        valid_flag = True
        if time_scale == 'M':  # Monthly trend
            t_Q_aggr = t_Q.groupby(t_Q.Date.dt.strftime('%Y-%m')).Flow.agg(
                ['mean'])
            nod = len(t_Q_aggr)

            if nod < 120:  # We should have more than 10-year lenth of data
                valid_flag = False

        elif time_scale == 'Y':  # Yearly trend
            t_Q_aggr = t_Q.groupby(t_Q.Date.dt.strftime('%Y')).Flow.agg(
                ['mean'])
            nod = len(t_Q_aggr)
            if nod < 10:  # We should have more than 10-year lenth of data
                valid_flag = False

        else:
            print('Please select M (monthly trend) or Y (yearly trend)')
            valid_flag = False

        if valid_flag:
            x = np.arange((len(t_Q_aggr)))
            y = t_Q_aggr.to_numpy().ravel()

            # Theilslopes
            R_TS = stats.theilslopes(y, x, alpha=1 - target_alpha)
            """            
            Ruetunrs:
            1) medslope : float
                Theil slope.
            2) medintercept : float
                Intercept of the Theil line, as median(y) - medslope*median(x).
            3) lo_slope : float
                Lower bound of the confidence interval on medslope.
            4) up_slope : float
                Upper bound of the confidence interval on medslope.
            https://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.mstats.theilslopes.html
            """

            # Mann Kendall Trend Test
            R_MK = mkt.test(x, y, 1, target_alpha, "upordown")
            """
            Returns
            1) MK : string
                result of the statistical test indicating whether or not to accept hte
                alternative hypothesis 'Ha'
            2) m : scalar, float
                slope of the linear fit to the data
            3) c : scalar, float
                intercept of the linear fit to the data
            4) p : scalar, float, greater than zero
                p-value of the obtained Z-score statistic for the Mann-Kendall test
            # https://up-rs-esp.github.io/mkt/_modules/mkt.html
            """
            if (R_MK[3] < target_alpha) & (R_MK[1] > 0):
                trend_result = 1  # increasing trend
                slope_result = R_TS[0]
            elif (R_MK[3] < target_alpha) & (R_MK[1] < 0):
                trend_result = -1  # decreasing trend
                slope_result = R_TS[0]
            else:
                trend_result = 0  # no trend
                slope_result = 0
        else:  # Any cases we cannot conduct the trend analysis
            trend_result = False
            slope_result = False
            R_TS = False
            R_MK = False

        return trend_result, slope_result, R_TS, R_MK