def test_paa(): """Testing 'paa'""" # Parameter X = np.arange(30) X_size = 30 # Test 1 arr_actual = paa(X, X_size, window_size=2, overlapping=0.) arr_desired = np.arange(0.5, 30, 2) np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0.) # Test 2 arr_actual = paa(X, X_size, window_size=3, overlapping=0.) arr_desired = np.arange(1, 30, 3) np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0.) # Test 3 arr_actual = paa(X, X_size, window_size=5, overlapping=0.) arr_desired = np.arange(2, 30, 5) np.testing.assert_allclose(arr_actual, arr_desired, atol=1e-5, rtol=0.)
def plot_paa(ts, window_size=None, output_size=None, overlapping=True, output_file=None, **kwargs): """Plot the time series before and after PAA transformation. Parameters ---------- ts : np.array, shape = [n_features] time series to plot window_size : int or None (default = None) size of the sliding window output_size : int or None (default = None) size of the returned time series overlapping : bool (default = True) when output_size is specified, the window_size is fixed if overlapping is True and may vary if overlapping is False. Ignored if window_size is specified. output_file : str or None (default = None) if str, save the figure. kwargs : keyword arguments kwargs for matplotlib.pyplot.plot """ # Check input data if not (isinstance(ts, np.ndarray) and ts.ndim == 1): raise ValueError("'ts' must be a 1-dimensional np.ndarray.") # Size of ts ts_size = ts.size # Check parameters and compute window_size if output_size is given if (window_size is None and output_size is None): raise ValueError("'window_size' xor 'output_size' must be specified.") elif (window_size is not None and output_size is not None): raise ValueError("'window_size' xor 'output_size' must be specified.") elif (window_size is not None and output_size is None): if not isinstance(overlapping, (float, int)): raise TypeError("'overlapping' must be a boolean.") if not isinstance(window_size, int): raise TypeError("'window_size' must be an integer.") if window_size < 1: raise ValueError("'window_size' must be greater or equal than 1.") if window_size > ts_size: raise ValueError( "'window_size' must be lower or equal than the size of each time series." ) else: if not isinstance(overlapping, (float, int)): raise TypeError("'overlapping' must be a boolean.") if not isinstance(output_size, int): raise TypeError("'output_size' must be an integer.") if output_size < 1: raise ValueError("'output_size' must be greater or equal than 1.") if output_size > ts_size: raise ValueError( "'output_size' must be lower or equal than the size of each time series." ) window_size = ts_size // output_size window_size += 0 if ts_size % output_size == 0 else 1 indices, mean = paa(ts, ts.size, window_size, overlapping, output_size, plot=True) indices_len = len(indices) plt.plot(ts, color='#1f77b4', **kwargs) for i in range(indices_len): plt.plot(indices[i], np.repeat(mean[i], indices[i].size), 'r-') plt.axvline(x=indices[0][0], ls='--', linewidth=1, color='k') for i in range(indices_len - 1): plt.axvline(x=(indices[i][-1] + indices[i + 1][0]) / 2, ls='--', linewidth=1, color='k') plt.axvline(x=indices[indices_len - 1][-1], ls='--', linewidth=1, color='k') if output_file is not None: plt.savefig(output_file) # Show plot plt.show()
def plot_paa_sax(ts, window_size=None, output_size=None, overlapping=True, n_bins=8, quantiles='gaussian', output_file=None, **kwargs): """Plot the original time series, the time series after PAA transformation and the time series after PAA and SAX transformations. Parameters ---------- ts : np.array, shape = [n_features] time series to plot window_size : int or None (default = None) size of the sliding window output_size : int or None (default = None) size of the returned time series overlapping : bool (default = True) when output_size is specified, the window_size is fixed if overlapping is True and may vary if overlapping is False. Ignored if window_size is specified. n_bins : int (default = 8) number of bins (also known as the size of the alphabet) quantiles : str (default = 'gaussian') the way to compute quantiles. Possible values: - 'gaussian' : quantiles from a gaussian distribution N(0,1) - 'empirical' : empirical quantiles output_file : str or None (default = None) if str, save the figure. kwargs : keyword arguments kwargs for matplotlib.pyplot.plot """ # Check input data if not (isinstance(ts, np.ndarray) and ts.ndim == 1): raise ValueError("'ts' must be a 1-dimensional np.ndarray.") # Size of ts ts_size = ts.size # Check parameters for PAA and compute window_size if output_size is given if (window_size is None and output_size is None): raise ValueError("'window_size' xor 'output_size' must be specified.") elif (window_size is not None and output_size is not None): raise ValueError("'window_size' xor 'output_size' must be specified.") elif (window_size is not None and output_size is None): if not isinstance(overlapping, (float, int)): raise TypeError("'overlapping' must be a boolean.") if not isinstance(window_size, int): raise TypeError("'window_size' must be an integer.") if window_size < 1: raise ValueError("'window_size' must be greater or equal than 1.") if window_size > ts_size: raise ValueError( "'window_size' must be lower or equal than the size 'ts'.") else: if not isinstance(overlapping, (float, int)): raise TypeError("'overlapping' must be a boolean.") if not isinstance(output_size, int): raise TypeError("'output_size' must be an integer.") if output_size < 1: raise ValueError("'output_size' must be greater or equal than 1.") if output_size > ts_size: raise ValueError( "'output_size' must be lower or equal than the size of 'ts'.") window_size = ts_size // output_size window_size += 0 if ts_size % output_size == 0 else 1 # Check parameters for SAX if not isinstance(n_bins, int): raise TypeError("'n_bins' must be an integer") if n_bins < 2: raise ValueError("'n_bins' must be greater or equal than 2") if n_bins > 52: raise ValueError("'n_bins' must be lower or equal than 52") if quantiles not in ['gaussian', 'empirical']: raise ValueError( "'quantiles' must be either 'gaussian' or 'empirical'") indices, ts_paa = paa(ts, ts.size, window_size, overlapping, output_size, plot=True) indices_len = len(indices) fig = plt.figure() ax = fig.add_subplot(111) plt.plot(ts, color='#1f77b4', **kwargs) for i in range(indices_len): plt.plot(indices[i], np.repeat(ts_paa[i], indices[i].size), 'r-') plt.axvline(x=indices[0][0], ls='--', linewidth=1, color='k') for i in range(indices_len - 1): plt.axvline(x=(indices[i][-1] + indices[i + 1][0]) / 2, ls='--', linewidth=1, color='k') plt.axvline(x=indices[indices_len - 1][-1], ls='--', linewidth=1, color='k') # Alphabet alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" # Compute gaussian quantiles if quantiles == 'gaussian' if quantiles == 'gaussian': quantiles = scipy.stats.norm.ppf(np.linspace(0, 1, num=n_bins + 1)[1:]) ts_sax = sax(ts_paa, n_bins, quantiles, alphabet, plot=True) else: quantiles, ts_sax = sax(ts_paa, n_bins, quantiles, alphabet, plot=True) for i in range(n_bins - 1): plt.axhline(y=quantiles[i], ls='--', lw=1, color='g') x_lim = ax.get_xlim() y_lim = ax.get_ylim() for i in range(indices_len): x_pos = (np.percentile(indices[i], [50]) - x_lim[0]) / (x_lim[1] - x_lim[0]) y_pos = (ts_paa[i] - y_lim[0]) / (y_lim[1] - y_lim[0]) ax.text(x_pos, y_pos, ts_sax[i], horizontalalignment='center', verticalalignment='bottom', transform=ax.transAxes, color='m', fontsize=25) if output_file is not None: plt.savefig(output_file) # Show plot plt.show()