def saa_pax(dataset, title): """ Show the graph of PAA and SAX of time series data :param dataset: time series of a stock :return: """ n_ts, sz, d = 1, 100, 1 scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.) # Rescale time series dataset = scaler.fit_transform(dataset) # PAA transform (and inverse transform) of the data n_paa_segments = 10 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset)) # SAX transform n_sax_symbols = 8 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(dataset)) plt.figure() plt.subplot(2, 2, 1) # First, raw time series plt.plot(dataset[0].ravel(), "b-") plt.title("Raw time series " + title) plt.subplot(2, 2, 2) # Second, PAA plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA " + title) plt.subplot(2, 2, 3) # Then SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(sax_dataset_inv[0].ravel(), "b-") plt.title("SAX, %d symbols" % n_sax_symbols) plt.subplot(2, 2, 4) # Finally, 1d-SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-") plt.title("1d-SAX, %d symbols (%dx%d)" % (n_sax_symbols_avg * n_sax_symbols_slope, n_sax_symbols_avg, n_sax_symbols_slope)) plt.tight_layout() plt.show()
def sax(self, data): n_paa_segments = 10 n_sax_symbols_avg = 8 n_sax_symbols_slop = 8 sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slop) Sax_data = sax.inverse_transform(sax.fit_transform(data)) data_new = np.reshape(Sax_data, (Sax_data.shape[0], Sax_data.shape[1])) return data_new
def genList1D_SAX(instances_nor, windowSize, timestamp, n_sax_symbols_avg=5, n_sax_symbols_slope=5): one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=windowSize, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) transformed_data = one_d_sax.fit_transform(instances_nor) one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data) return { "sketchInstances": list(one_d_sax_dataset_inv[0].ravel()), "timestamp": timestamp }
def get_sax_transformation(df, features_to_compute='probability', segments=10, symbols=8): """ Re sort dataframe station / ts Aggr time serie for each station Symbolic Aggregate approXimation If the time serie can't be divide by segment. We take lhe last x value en df df : DataFrame features_to_compute : string - column's name of the features we want to agg segments : int - number of point we want to agg. symbols : int - Number of SAX symbols to use to describe slopes """ sax_list_result = [] df = df.reset_index() df = df.sort_values(['station', 'ts']) for station in df.station.unique(): data = df[df.station == station].copy() n_paa_segments = round((len(data) * segments / 100) - 0.5) n_sax_symbols_avg = round((len(data) * symbols / 100) - 0.5) n_sax_symbols_slope = round((len(data) * symbols / 100) - 0.5) one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) sax_list_result.extend( one_d_sax.inverse_transform( one_d_sax.fit_transform( data[features_to_compute][0:n_paa_segments * segments].values)).ravel()) if len(sax_list_result) != len(data): sax_list_result.extend( data[features_to_compute][n_paa_segments * segments:len(data)].values) result = sax_list_result df['sax'] = result df['sax'] = df['sax'].astype('float') df = df.sort_values(['ts', 'station']) df = df.set_index('ts') return df
# SAX transform n_sax_symbols = 8 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(dataset)) graph_idx = graph_idx + 1 plt.subplot(len(pos_relatedStock), 4, graph_idx) # First, raw time series plt.plot(dataset[0].ravel(), "b-") plt.title("Raw time series: " + stockCode) graph_idx = graph_idx + 1 plt.subplot(len(pos_relatedStock), 4, graph_idx) # Second, PAA plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA: " + stockCode) graph_idx = graph_idx + 1 plt.subplot(len(pos_relatedStock), 4, graph_idx) # Then SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
ts = ts - np.mean(ts) ts /= np.std(ts, ddof=1) print('length of ts', len(ts)) n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 n_paa_segments = 10 # 1d-SAX transform one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope, sigma_l=np.sqrt(0.03 / (np.floor(len(ts) / n_paa_segments)))) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(ts)) # Our oneD_SAX width = len(ts) // n_paa_segments onedsax = oneD_SAX(w=width, k_slope=n_sax_symbols_slope, k_intercept=n_sax_symbols_avg) onedsax_ts = onedsax.transform(ts) recon_onedsax = onedsax.inverse_transform(onedsax_ts) # plot plt.figure() plt.plot(ts, "b-", alpha=0.4) plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-") plt.plot(recon_onedsax, 'r--') plt.legend([
# SAX transform n_sax_symbols = 256 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) print("a") # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) transformed_data = one_d_sax.fit_transform(dataset) one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data) plt.figure() plt.subplot(2, 2, 1) # First, raw time series plt.plot(dataset[0].ravel(), "b-") plt.title("Raw time series") plt.subplot(2, 2, 2) # Second, PAA plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA") plt.subplot(2, 2, 3) # Then SAX plt.plot(dataset[0].ravel(), "b-", alpha=0.4) plt.plot(sax_dataset_inv[0].ravel(), "b-")
# PAA transform (and inverse transform) of the data n_paa_segments = 10 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset)) df['paa'] = paa_dataset_inv[0] # SAX transform n_sax_symbols = 4 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset)) df['sax'] = sax_dataset_inv[0] # 1d-SAX transform n_sax_symbols_avg = 8 n_sax_symbols_slope = 8 one_d_sax = OneD_SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform(one_d_sax.fit_transform(dataset)) df['one_dsax'] = one_d_sax_dataset_inv[0] df_list.append(df[['name','code','date','price','raw','paa','sax','one_dsax','Sector_main','sector']]) except ZeroDivisionError: pass #df_list df = pd.concat(df_list) df.to_csv('Stock_paa_sax.csv',index=False,sep=',') df
# 想法四:利用PAA等技术 # 然后使用sklearn等库 from tslearn.piecewise import PiecewiseAggregateApproximation from tslearn.piecewise import SymbolicAggregateApproximation, OneD_SymbolicAggregateApproximation import time # 1dSAX n_paa_segments = 40 n_sax_symbols_avg = 30 n_sax_symbols_slope = 30 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) transformed_data = one_d_sax.inverse_transform( one_d_sax.fit_transform(stdData)) from sklearn.cluster import MiniBatchKMeans, KMeans, DBSCAN, SpectralClustering, Birch from sklearn.metrics import calinski_harabasz_score, davies_bouldin_score n_cluster = 100 #Kmeans 结果 # 超参数:k的取值 s = time.time() km = KMeans(n_clusters=n_cluster, random_state=0) y_pre = km.fit_predict(transformed_data) e = time.time() print(e - s, "s") print(davies_bouldin_score(transformed_data, y_pre))
print("stockname" + str(i)) scaleddata = scaler.fit_transform(df_red[[i]]) #print(scaleddata) paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(scaleddata)) # SAX transform sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(scaleddata)) # 1d-SAX transform one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform( one_d_sax.fit_transform(scaleddata)) plt.figure() # First, raw time series plt.subplot(2, 2, 1) plt.plot(scaleddata[0].ravel(), "b-") plt.title("Raw time series") # Second, PAA plt.subplot(2, 2, 2) plt.plot(scaleddata[0].ravel(), "b-", alpha=0.4) plt.plot(paa_dataset_inv[0].ravel(), "b-") plt.title("PAA") #SAX plot plt.subplot(2, 2, 3) # Then SAX plt.plot(scaleddata[0].ravel(), "b-", alpha=0.4) plt.plot(sax_dataset_inv[0].ravel(), "b-") plt.title("SAX, %d symbols" % n_sax_symbols)
n_sax_symbols_avg = 10 n_sax_symbols_slope = 6 for i in listnew: records = len(df_red[[i]]) print("stockname"+str(i)) scaleddata = scaler.fit_transform(df_red[[i]]) #print(scaleddata) paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments) paa_dataset_inv = paa.inverse_transform(paa.fit_transform(scaleddata)) # SAX transform sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(scaleddata)) # 1d-SAX transform one_d_sax = OneD_SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) one_d_sax_dataset_inv = one_d_sax.inverse_transform(one_d_sax.fit_transform(scaleddata)) plt.figure() # First, raw time series plt.subplot(1, 2, 1) plt.plot(scaleddata[0].ravel(), "b-") plt.title("Raw time series") plt.suptitle('Stockname: ' + i,fontsize=16) plt.subplot(1, 2, 2) # Finally, 1d-SAX plt.plot(scaleddata[0].ravel(), "b-", alpha=0.4) plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-") plt.title("1d-SAX, %d symbols (%dx%d)" % (n_sax_symbols_avg * n_sax_symbols_slope, n_sax_symbols_avg, n_sax_symbols_slope)) plt.tight_layout() plt.subplots_adjust(wspace=0.8, top=0.8) plt.show()
paa_dataset_inv = paa.inverse_transform(paa.fit_transform(lc_nor)) # SAX transform n_sax_symbols = 25 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols) sax_dataset_inv = sax.inverse_transform(sax.fit_transform(lc_nor)) # 1d-SAX transform n_sax_symbols_avg = 5 n_sax_symbols_slope = 5 one_d_sax = OneD_SymbolicAggregateApproximation( n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg, alphabet_size_slope=n_sax_symbols_slope) transformed_data = one_d_sax.fit_transform(lc_nor) one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data) #dynamic binning lc_nor_list = list(lc_nor[0].ravel()) corePlot = sketchDyBinService(windowSize=n_paa_segments, initialBin=3, isOnline=False) sketchInstances = corePlot.sketchMode(instances=lc_nor_list) print("a") plt.figure() plt.subplot(2, 2, 1) # First, raw time series plt.plot(timestamps,lc_nor[0].ravel(), "b-") plt.title("Raw time series")