def saa_pax(dataset, title): """ Show the graph of PAA and SAX of time series data :param dataset: time series of a stock :return: """ n_ts, sz, d = 1, 100, 1 scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) # Rescale time series dataset = scaler.fit_transform(dataset) # PAA transform (and inverse transform) of the data n_paa_segments = 10 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset)) # SAX transform n_sax_symbols = 8 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(dataset)) plt.figure() plt.subplot(2, 2, 1) # First, raw time series plt.plot(dataset[0].ravel(), "b-") plt.title("Raw time series " + title) plt.subplot(2, 2, 2) # Second, PAA plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA " + title) plt.subplot(2, 2, 3) # Then SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(sax_dataset_inv[0].ravel(), "b-") plt.title("SAX, %d symbols" % n_sax_symbols) plt.subplot(2, 2, 4) # Finally, 1d-SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-") plt.title("1d-SAX, %d symbols (%dx%d)" % (n_sax_symbols_avg * n_sax_symbols_slope, n_sax_symbols_avg, n_sax_symbols_slope)) plt.tight_layout() plt.show()
def sax(self, data): n_paa_segments = 10 n_sax_symbols_avg = 8 n_sax_symbols_slop = 8 sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slop) Sax_data = sax.inverse_transform(sax.fit_transform(data)) data_new = np.reshape(Sax_data, (Sax_data.shape[0], Sax_data.shape[1])) return data_new
def genList1D_SAX(instances_nor, windowSize, timestamp, n_sax_symbols_avg=5, n_sax_symbols_slope=5): one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=windowSize, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) transformed_data = one_d_sax.fit_transform(instances_nor) one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data) return { "sketchInstances": list(one_d_sax_dataset_inv[0].ravel()), "timestamp": timestamp }
def get_sax_transformation(df, features_to_compute='probability', segments=10, symbols=8): """ Re sort dataframe station / ts Aggr time serie for each station Symbolic Aggregate approXimation If the time serie can't be divide by segment. We take lhe last x value en df df : DataFrame features_to_compute : string - column's name of the features we want to agg segments : int - number of point we want to agg. symbols : int - Number of SAX symbols to use to describe slopes """ sax_list_result = [] df = df.reset_index() df = df.sort_values(['station', 'ts']) for station in df.station.unique(): data = df[df.station == station].copy() n_paa_segments = round((len(data) * segments / 100) - 0.5) n_sax_symbols_avg = round((len(data) * symbols / 100) - 0.5) n_sax_symbols_slope = round((len(data) * symbols / 100) - 0.5) one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) sax_list_result.extend( one_d_sax.inverse_transform( one_d_sax.fit_transform( data[features_to_compute][0:n_paa_segments * segments].values)).ravel()) if len(sax_list_result) != len(data): sax_list_result.extend( data[features_to_compute][n_paa_segments * segments:len(data)].values) result = sax_list_result df['sax'] = result df['sax'] = df['sax'].astype('float') df = df.sort_values(['ts', 'station']) df = df.set_index('ts') return df
paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset)) # SAX transform n_sax_symbols = 8 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(dataset)) graph_idx = graph_idx + 1 plt.subplot(len(pos_relatedStock), 4, graph_idx) # First, raw time series plt.plot(dataset[0].ravel(), "b-") plt.title("Raw time series: " + stockCode) graph_idx = graph_idx + 1 plt.subplot(len(pos_relatedStock), 4, graph_idx) # Second, PAA plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA: " + stockCode) graph_idx = graph_idx + 1 plt.subplot(len(pos_relatedStock), 4, graph_idx) # Then SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
# SAX transform n_sax_symbols = 256 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) print("a") # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) transformed_data = one_d_sax.fit_transform(dataset) one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data) plt.figure() plt.subplot(2, 2, 1) # First, raw time series plt.plot(dataset[0].ravel(), "b-") plt.title("Raw time series") plt.subplot(2, 2, 2) # Second, PAA plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA") plt.subplot(2, 2, 3) # Then SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(sax_dataset_inv[0].ravel(), "b-") plt.title("SAX, %d symbols" % n_sax_symbols)
from numpy import fft # 想法四:利用PAA等技术 # 然后使用sklearn等库 from tslearn.piecewise import PiecewiseAggregateApproximation from tslearn.piecewise import SymbolicAggregateApproximation, OneD_SymbolicAggregateApproximation import time # 1dSAX n_paa_segments = 40 n_sax_symbols_avg = 30 n_sax_symbols_slope = 30 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) transformed_data = one_d_sax.inverse_transform( one_d_sax.fit_transform(stdData)) from sklearn.cluster import MiniBatchKMeans, KMeans, DBSCAN, SpectralClustering, Birch from sklearn.metrics import calinski_harabasz_score, davies_bouldin_score n_cluster = 100 #Kmeans 结果 # 超参数:k的取值 s = time.time() km = KMeans(n_clusters=n_cluster, random_state=0) y_pre = km.fit_predict(transformed_data) e = time.time() print(e - s, "s") print(davies_bouldin_score(transformed_data, y_pre))
records = len(df_red[[i]]) print("stockname" + str(i)) scaleddata = scaler.fit_transform(df_red[[i]]) #print(scaleddata) paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(scaleddata)) # SAX transform sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(scaleddata)) # 1d-SAX transform one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(scaleddata)) plt.figure() # First, raw time series plt.subplot(2, 2, 1) plt.plot(scaleddata[0].ravel(), "b-") plt.title("Raw time series") # Second, PAA plt.subplot(2, 2, 2) plt.plot(scaleddata[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA") #SAX plot plt.subplot(2, 2, 3) # Then SAX plt.plot(scaleddata[0].ravel(), "b-", alpha=0.4) plt.plot(sax_dataset_inv[0].ravel(), "b-") plt.title("SAX, %d symbols" % n_sax_symbols)