Пример #1
0
    def test_rolling_max_how_resample(self):

        indices = [datetime(1975, 1, i) for i in range(1, 6)]
        # So that we can have 3 datapoints on last day (4, 10, and 20)
        indices.append(datetime(1975, 1, 5, 1))
        indices.append(datetime(1975, 1, 5, 2))
        series = Series(list(range(0, 5)) + [10, 20], index=indices)
        # Use floats instead of ints as values
        series = series.map(lambda x: float(x))
        # Sort chronologically
        series = series.sort_index()

        # Default how should be max
        expected = Series([0.0, 1.0, 2.0, 3.0, 20.0],
                          index=[datetime(1975, 1, i, 0)
                                 for i in range(1, 6)])
        x = mom.rolling_max(series, window=1, freq='D')
        assert_series_equal(expected, x)

        # Now specify median (10.0)
        expected = Series([0.0, 1.0, 2.0, 3.0, 10.0],
                          index=[datetime(1975, 1, i, 0)
                                 for i in range(1, 6)])
        x = mom.rolling_max(series, window=1, freq='D', how='median')
        assert_series_equal(expected, x)

        # Now specify mean (4+10+20)/3
        v = (4.0+10.0+20.0)/3.0
        expected = Series([0.0, 1.0, 2.0, 3.0, v],
                          index=[datetime(1975, 1, i, 0)
                                 for i in range(1, 6)])
        x = mom.rolling_max(series, window=1, freq='D', how='mean')
        assert_series_equal(expected, x)
    def test_rolling_max_how_resample(self):

        indices = [datetime(1975, 1, i) for i in range(1, 6)]
        # So that we can have 3 datapoints on last day (4, 10, and 20)
        indices.append(datetime(1975, 1, 5, 1))
        indices.append(datetime(1975, 1, 5, 2))
        series = Series(list(range(0, 5)) + [10, 20], index=indices)
        # Use floats instead of ints as values
        series = series.map(lambda x: float(x))
        # Sort chronologically
        series = series.sort_index()

        # Default how should be max
        expected = Series([0.0, 1.0, 2.0, 3.0, 20.0],
                          index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
        x = mom.rolling_max(series, window=1, freq='D')
        assert_series_equal(expected, x)

        # Now specify median (10.0)
        expected = Series([0.0, 1.0, 2.0, 3.0, 10.0],
                          index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
        x = mom.rolling_max(series, window=1, freq='D', how='median')
        assert_series_equal(expected, x)

        # Now specify mean (4+10+20)/3
        v = (4.0 + 10.0 + 20.0) / 3.0
        expected = Series([0.0, 1.0, 2.0, 3.0, v],
                          index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
        x = mom.rolling_max(series, window=1, freq='D', how='mean')
        assert_series_equal(expected, x)
Пример #3
0
def find_peak_ind(data,
                  width,
                  width_roll_mean=200,
                  roll_max_peaks_threshold=4.0,
                  is_ret_roll_max_peaks=False):
    """
    Calculate the indices of isolated maxima in the data array usually containing the result
    of a correlation calculation bewteen a timeseries and a pattern.
    
    Parameters
    ----------
    data : 1d ndarray
        Timeseries,usually containing the result
        of a correlation calculation between a timeseries and a pattern.
    width : int
        The width of an interval in which the maximum is found. I.e. two maxima have to be at least
        width apart to be registered as separate.
    width_roll_mean : int
        The width used for the rolling mean normalisation of the data for better identification
        of pattern matches as it only looks for narrow peaks.
    roll_max_peaks_threshold : float
        The threshold for when a peak is considered high enough to be added to the returned indices.
        A peak has to be roll_max_peaks_threshold times larger in amplitude than the rolling mean to be
        registered as valid peak.
    is_ret_roll_max_peaks : bool
        Return roll_max_peaks or not. Default is not.
    Returns
    -------
    peak_inds : list
        List of indices of the peaks in data.
    roll_max_peaks : ndarray, if is_ret_roll_max_peaks
        Rolling maximum of data normalised by its rolling mean.
    """

    roll_mean = mom.rolling_mean(data, width_roll_mean, center=True)
    #     plt.figure()
    #     plt.plot(data)
    #     plt.show()
    roll_mean = 1
    roll_max_peaks = mom.rolling_max(data / roll_mean, width, center=False)
    # -- Calculate the centered rolling max.
    roll_max_peaks_c = mom.rolling_max(data / roll_mean, width, center=True)

    roll_peak_inds, = np.nonzero((roll_max_peaks > roll_max_peaks_threshold))
    peak_inds = []
    for c in roll_peak_inds[1:-1]:
        # -- max is when left entry in roll_max_peaks is smaller and right is equal and
        #    if in centered roll_max_peaks_c the left (and the right) are the same
        if (roll_max_peaks[c - 1] < roll_max_peaks[c]
                and np.abs(roll_max_peaks[c] - roll_max_peaks[c + 1]) < 0.0001
                and
                np.abs(roll_max_peaks[c] - roll_max_peaks_c[c - 1]) < 0.0001):
            peak_inds.append(c)

    if is_ret_roll_max_peaks:
        return peak_inds, roll_max_peaks
    else:
        return peak_inds
Пример #4
0
def find_peak_ind(data,width,width_roll_mean = 200,roll_max_peaks_threshold = 4.0, is_ret_roll_max_peaks = False):
    """
    Calculate the indices of isolated maxima in the data array usually containing the result
    of a correlation calculation bewteen a timeseries and a pattern.
    
    Parameters
    ----------
    data : 1d ndarray
        Timeseries,usually containing the result
        of a correlation calculation between a timeseries and a pattern.
    width : int
        The width of an interval in which the maximum is found. I.e. two maxima have to be at least
        width apart to be registered as separate.
    width_roll_mean : int
        The width used for the rolling mean normalisation of the data for better identification
        of pattern matches as it only looks for narrow peaks.
    roll_max_peaks_threshold : float
        The threshold for when a peak is considered high enough to be added to the returned indices.
        A peak has to be roll_max_peaks_threshold times larger in amplitude than the rolling mean to be
        registered as valid peak.
    is_ret_roll_max_peaks : bool
        Return roll_max_peaks or not. Default is not.
    Returns
    -------
    peak_inds : list
        List of indices of the peaks in data.
    roll_max_peaks : ndarray, if is_ret_roll_max_peaks
        Rolling maximum of data normalised by its rolling mean.
    """

    roll_mean = mom.rolling_mean(data, width_roll_mean,center=True)
#     plt.figure()
#     plt.plot(data)
#     plt.show()
    roll_mean = 1
    roll_max_peaks = mom.rolling_max(data/roll_mean,width,center=False)
    # -- Calculate the centered rolling max. 
    roll_max_peaks_c = mom.rolling_max(data/roll_mean,width,center=True)    

    roll_peak_inds, = np.nonzero((roll_max_peaks > roll_max_peaks_threshold))
    peak_inds = []
    for c in roll_peak_inds[1:-1]:
        # -- max is when left entry in roll_max_peaks is smaller and right is equal and
        #    if in centered roll_max_peaks_c the left (and the right) are the same
        if (roll_max_peaks[c-1] < roll_max_peaks[c] and np.abs(roll_max_peaks[c]-roll_max_peaks[c+1]) < 0.0001
                and np.abs(roll_max_peaks[c]-roll_max_peaks_c[c-1]) < 0.0001):
            peak_inds.append(c)
            
    if is_ret_roll_max_peaks:
        return peak_inds,roll_max_peaks
    else:
        return peak_inds
Пример #5
0
    def test_rolling_max(self):
        self._check_moment_func(mom.rolling_max, np.max)

        a = np.array([1,2,3,4,5])
        b = mom.rolling_max(a, window=100, min_periods=1)
        assert_almost_equal(a, b)

        self.assertRaises(ValueError, mom.rolling_max, np.array([1,2,3]), window=3, min_periods=5)
Пример #6
0
    def test_rolling_max(self):
        self._check_moment_func(mom.rolling_max, np.max)

        a = np.array([1,2,3,4,5])
        b = mom.rolling_max(a, window=100, min_periods=1)
        assert_almost_equal(a, b)

        self.assertRaises(ValueError, mom.rolling_max, np.array([1,2,3]), window=3, min_periods=5)
Пример #7
0
    def test_rolling_functions_window_non_shrinkage(self):
        # GH 7764
        s = Series(range(4))
        s_expected = Series(np.nan, index=s.index)
        df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B'])
        df_expected = DataFrame(np.nan, index=df.index, columns=df.columns)
        df_expected_panel = Panel(items=df.index,
                                  major_axis=df.columns,
                                  minor_axis=df.columns)

        functions = [
            lambda x: mom.rolling_cov(
                x, x, pairwise=False, window=10, min_periods=5),
            lambda x: mom.rolling_corr(
                x, x, pairwise=False, window=10, min_periods=5),
            lambda x: mom.rolling_max(x, window=10, min_periods=5),
            lambda x: mom.rolling_min(x, window=10, min_periods=5),
            lambda x: mom.rolling_sum(x, window=10, min_periods=5),
            lambda x: mom.rolling_mean(x, window=10, min_periods=5),
            lambda x: mom.rolling_std(x, window=10, min_periods=5),
            lambda x: mom.rolling_var(x, window=10, min_periods=5),
            lambda x: mom.rolling_skew(x, window=10, min_periods=5),
            lambda x: mom.rolling_kurt(x, window=10, min_periods=5),
            lambda x: mom.rolling_quantile(
                x, quantile=0.5, window=10, min_periods=5),
            lambda x: mom.rolling_median(x, window=10, min_periods=5),
            lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5),
            lambda x: mom.rolling_window(
                x, win_type='boxcar', window=10, min_periods=5),
        ]
        for f in functions:
            try:
                s_result = f(s)
                assert_series_equal(s_result, s_expected)

                df_result = f(df)
                assert_frame_equal(df_result, df_expected)
            except (ImportError):

                # scipy needed for rolling_window
                continue

        functions = [
            lambda x: mom.rolling_cov(
                x, x, pairwise=True, window=10, min_periods=5),
            lambda x: mom.rolling_corr(
                x, x, pairwise=True, window=10, min_periods=5),
            # rolling_corr_pairwise is depracated, so the following line should be deleted
            # when rolling_corr_pairwise is removed.
            lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5
                                                ),
        ]
        for f in functions:
            df_result_panel = f(df)
            assert_panel_equal(df_result_panel, df_expected_panel)
Пример #8
0
    def test_rolling_max_gh6297(self):
        """Replicate result expected in GH #6297"""

        indices = [datetime(1975, 1, i) for i in range(1, 6)]
        # So that we can have 2 datapoints on one of the days
        indices.append(datetime(1975, 1, 3, 6, 0))
        series = Series(range(1, 7), index=indices)
        # Use floats instead of ints as values
        series = series.map(lambda x: float(x))
        # Sort chronologically
        series = series.sort_index()

        expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
        x = mom.rolling_max(series, window=1, freq="D")
        assert_series_equal(expected, x)
    def test_rolling_max_gh6297(self):
        """Replicate result expected in GH #6297"""

        indices = [datetime(1975, 1, i) for i in range(1, 6)]
        # So that we can have 2 datapoints on one of the days
        indices.append(datetime(1975, 1, 3, 6, 0))
        series = Series(range(1, 7), index=indices)
        # Use floats instead of ints as values
        series = series.map(lambda x: float(x))
        # Sort chronologically
        series = series.sort_index()

        expected = Series([1.0, 2.0, 6.0, 4.0, 5.0],
                          index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
        x = mom.rolling_max(series, window=1, freq='D')
        assert_series_equal(expected, x)
Пример #10
0
    def test_rolling_functions_window_non_shrinkage(self):
        # GH 7764
        s = Series(range(4))
        s_expected = Series(np.nan, index=s.index)
        df = DataFrame([[1,5], [3, 2], [3,9], [-1,0]], columns=['A','B'])
        df_expected = DataFrame(np.nan, index=df.index, columns=df.columns)
        df_expected_panel = Panel(items=df.index, major_axis=df.columns, minor_axis=df.columns)

        functions = [lambda x: mom.rolling_cov(x, x, pairwise=False, window=10, min_periods=5),
                     lambda x: mom.rolling_corr(x, x, pairwise=False, window=10, min_periods=5),
                     lambda x: mom.rolling_max(x, window=10, min_periods=5),
                     lambda x: mom.rolling_min(x, window=10, min_periods=5),
                     lambda x: mom.rolling_sum(x, window=10, min_periods=5),
                     lambda x: mom.rolling_mean(x, window=10, min_periods=5),
                     lambda x: mom.rolling_std(x, window=10, min_periods=5),
                     lambda x: mom.rolling_var(x, window=10, min_periods=5),
                     lambda x: mom.rolling_skew(x, window=10, min_periods=5),
                     lambda x: mom.rolling_kurt(x, window=10, min_periods=5),
                     lambda x: mom.rolling_quantile(x, quantile=0.5, window=10, min_periods=5),
                     lambda x: mom.rolling_median(x, window=10, min_periods=5),
                     lambda x: mom.rolling_apply(x, func=sum, window=10, min_periods=5),
                     lambda x: mom.rolling_window(x, win_type='boxcar', window=10, min_periods=5),
                    ]
        for f in functions:
            try:
                s_result = f(s)
                assert_series_equal(s_result, s_expected)

                df_result = f(df)
                assert_frame_equal(df_result, df_expected)
            except (ImportError):

                # scipy needed for rolling_window
                continue

        functions = [lambda x: mom.rolling_cov(x, x, pairwise=True, window=10, min_periods=5),
                     lambda x: mom.rolling_corr(x, x, pairwise=True, window=10, min_periods=5),
                     # rolling_corr_pairwise is depracated, so the following line should be deleted
                     # when rolling_corr_pairwise is removed.
                     lambda x: mom.rolling_corr_pairwise(x, x, window=10, min_periods=5),
                    ]
        for f in functions:
            df_result_panel = f(df)
            assert_panel_equal(df_result_panel, df_expected_panel)
Пример #11
0
def aggregated_line_seeds(results, title):
    plt.close()
    sorted_points = np.array(sorted(results, key=itemgetter(1)))
    sorted_time = sorted_points[:, 1] / 60 / 60
    sorted_errors = sorted_points[:, 2]
    if is_regression:
        sorted_errors = np.log10(sorted_errors)

    y_mean = stats.rolling_mean(sorted_errors, 5)
    # y_std = stats.rolling_std(sorted_errors, 5)
    y_upper = stats.rolling_max(sorted_errors, 5)
    y_lower = stats.rolling_min(sorted_errors, 5)

    plt.plot(sorted_time, y_mean, color="red", label="Rolling mean")

    # plt.legend()
    plt.fill_between(sorted_time,
                     y_mean,
                     y_upper,
                     facecolor='gray',
                     interpolate=True,
                     alpha=0.5)
    plt.fill_between(sorted_time,
                     y_lower,
                     y_mean,
                     facecolor='gray',
                     interpolate=True,
                     alpha=0.5)

    plt.xlabel("Time (h)")
    if is_regression:
        plt.ylabel("log(RMSE)")
    else:
        plt.ylabel("% class. error")
        plt.ylim(0, 100)

    plt.margins(0.05, 0.05)

    plt.title(title)
    plt.savefig("%s/plots%s/trajectories-%s.aggregated.png" %
                (os.environ['AUTOWEKA_PATH'], suffix, title),
                bbox_inches='tight')
Пример #12
0
def hhv(s, n):
    return moments.rolling_max(s, n)
Пример #13
0
# ax.set_xlabel('Time (h)')
# ax.set_ylabel('RMSE')
# ax.set_yscale('log')
# ax.set_xlim(0,30)
# colors = sns.color_palette("husl", 25)
# for i in range(0,25):
# ax.scatter(time_by_seed[i], error_by_seed[i], c=cm.hsv(i/25.,1), s=[30]*len(time_by_seed[i]))
# ax.scatter(time_by_seed[i], error_by_seed[i], c=[colors[i]]*len(time_by_seed[i]), s=[30]*len(time_by_seed[i]))

ax1.set_xlabel('Time (h)')
ax1.set_ylabel('RMSE')
ax1.set_xlim(-1, 30)
y_mean = stats.rolling_mean(sorted_errors, 5)
y_std = stats.rolling_std(sorted_errors, 5)
# y_upper = y_mean + 2*y_std
y_upper = stats.rolling_max(sorted_errors, 5)
# y_lower = y_mean - 2*y_std
y_lower = stats.rolling_min(sorted_errors, 5)
sorted_data = DataFrame(data=sorted_points, columns=['time', 'binned_time', 'error', 'seed'])
# sns.jointplot("binned_time", "error", sorted_data)
# ax1.scatter(sorted_binned_time, sorted_errors)
ax1.plot(sorted_time, y_mean, color="red", label="Rolling mean")
# ax1.errorbar(sorted_binned_time, sorted_errors, marker='o', ms=8, yerr=3*y_std, ls='dotted', label="Rolling mean")
ax1.legend()
ax1.fill_between(sorted_time, y_mean, y_upper, facecolor='gray', interpolate=True, alpha=0.5)
ax1.fill_between(sorted_time, y_lower, y_mean, facecolor='gray', interpolate=True, alpha=0.5)
if not os.path.isdir("plots"):
    os.mkdir("plots")
#fig.savefig("plots/points.png", bbox_inches='tight')
fig.savefig("%s/plots%s/points-%s.png" % (os.environ['AUTOWEKA_PATH'], suffix, title), bbox_inches='tight')
# plt.show()
Пример #14
0
# ax.set_xlabel('Time (h)')
# ax.set_ylabel('RMSE')
# ax.set_yscale('log')
# ax.set_xlim(0,30)
# colors = sns.color_palette("husl", 25)
# for i in range(0,25):
# ax.scatter(time_by_seed[i], error_by_seed[i], c=cm.hsv(i/25.,1), s=[30]*len(time_by_seed[i]))
# ax.scatter(time_by_seed[i], error_by_seed[i], c=[colors[i]]*len(time_by_seed[i]), s=[30]*len(time_by_seed[i]))

ax1.set_xlabel('Time (h)')
ax1.set_ylabel('RMSE')
ax1.set_xlim(-1, 30)
y_mean = stats.rolling_mean(sorted_errors, 5)
y_std = stats.rolling_std(sorted_errors, 5)
# y_upper = y_mean + 2*y_std
y_upper = stats.rolling_max(sorted_errors, 5)
# y_lower = y_mean - 2*y_std
y_lower = stats.rolling_min(sorted_errors, 5)
sorted_data = DataFrame(data=sorted_points,
                        columns=['time', 'binned_time', 'error', 'seed'])
# sns.jointplot("binned_time", "error", sorted_data)
# ax1.scatter(sorted_binned_time, sorted_errors)
ax1.plot(sorted_time, y_mean, color="red", label="Rolling mean")
# ax1.errorbar(sorted_binned_time, sorted_errors, marker='o', ms=8, yerr=3*y_std, ls='dotted', label="Rolling mean")
ax1.legend()
ax1.fill_between(sorted_time,
                 y_mean,
                 y_upper,
                 facecolor='gray',
                 interpolate=True,
                 alpha=0.5)
Пример #15
0
def hhv(s, n):
    return moments.rolling_max(s, n)
Пример #16
0
# start of this month.
current_date = start_date = datetime.date(2010, 7, 1)
day = datetime.timedelta(days=1)
end_date = datetime.date.today()

pv_data = []

for i in range((end_date - start_date).days):
	try:
		html = html_for_day(current_date)
	except urllib2.URLError, e:
		continue
	finally:
		current_date = current_date + day

	tree = ElementTree()
	tree.parse(handle_illformed_xml(html))
	items = tree.iterfind('item')
	pv_data.extend(map(item_to_tuple, items))


pv = DataFrame.from_records(pv_data, index='timestamp', columns=["timestamp", "expost"])
fig = pyplot.figure()
rolling_max(pv, 4*24*30).plot(rot=15)
pyplot.title("Solar Photovoltaic production in Amprion's network Germany")
pyplot.ylabel("Rolling month maximum power produced [MW]")
pyplot.xlabel("Date")
pyplot.grid()
pyplot.savefig('ampiron.png')