예제 #1
0
def saa_pax(dataset, title):
    """
    Show the graph of PAA and SAX of time series data
    :param dataset: time series of a stock
    :return:
    """
    n_ts, sz, d = 1, 100, 1
    scaler = TimeSeriesScalerMeanVariance(mu=0., std=1.)  # Rescale time series
    dataset = scaler.fit_transform(dataset)

    # PAA transform (and inverse transform) of the data
    n_paa_segments = 10
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset))

    # SAX transform
    n_sax_symbols = 8
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))

    # 1d-SAX transform
    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(dataset))

    plt.figure()
    plt.subplot(2, 2, 1)  # First, raw time series
    plt.plot(dataset[0].ravel(), "b-")
    plt.title("Raw time series " + title)

    plt.subplot(2, 2, 2)  # Second, PAA
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(paa_dataset_inv[0].ravel(), "b-")
    plt.title("PAA " + title)

    plt.subplot(2, 2, 3)  # Then SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(sax_dataset_inv[0].ravel(), "b-")
    plt.title("SAX, %d symbols" % n_sax_symbols)

    plt.subplot(2, 2, 4)  # Finally, 1d-SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-")
    plt.title("1d-SAX, %d symbols (%dx%d)" %
              (n_sax_symbols_avg * n_sax_symbols_slope, n_sax_symbols_avg,
               n_sax_symbols_slope))

    plt.tight_layout()
    plt.show()
예제 #2
0
 def sax(self, data):
     n_paa_segments = 10
     n_sax_symbols_avg = 8
     n_sax_symbols_slop = 8
     sax = OneD_SymbolicAggregateApproximation(
         n_segments=n_paa_segments,
         alphabet_size_avg=n_sax_symbols_avg,
         alphabet_size_slope=n_sax_symbols_slop)
     Sax_data = sax.inverse_transform(sax.fit_transform(data))
     data_new = np.reshape(Sax_data, (Sax_data.shape[0], Sax_data.shape[1]))
     return data_new
예제 #3
0
def genList1D_SAX(instances_nor,
                  windowSize,
                  timestamp,
                  n_sax_symbols_avg=5,
                  n_sax_symbols_slope=5):
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=windowSize,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    transformed_data = one_d_sax.fit_transform(instances_nor)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data)

    return {
        "sketchInstances": list(one_d_sax_dataset_inv[0].ravel()),
        "timestamp": timestamp
    }
예제 #4
0
def get_sax_transformation(df,
                           features_to_compute='probability',
                           segments=10,
                           symbols=8):
    """
    Re sort dataframe station / ts
    Aggr time serie for each station
    Symbolic Aggregate approXimation
    If the time serie can't be divide by segment. We take lhe last x value en df
    df : DataFrame
    features_to_compute : string - column's name of the features we want to agg
    segments : int - number of point we want to agg.
    symbols : int - Number of SAX symbols to use to describe slopes
    """

    sax_list_result = []
    df = df.reset_index()
    df = df.sort_values(['station', 'ts'])

    for station in df.station.unique():
        data = df[df.station == station].copy()
        n_paa_segments = round((len(data) * segments / 100) - 0.5)
        n_sax_symbols_avg = round((len(data) * symbols / 100) - 0.5)
        n_sax_symbols_slope = round((len(data) * symbols / 100) - 0.5)
        one_d_sax = OneD_SymbolicAggregateApproximation(
            n_segments=n_paa_segments,
            alphabet_size_avg=n_sax_symbols_avg,
            alphabet_size_slope=n_sax_symbols_slope)

        sax_list_result.extend(
            one_d_sax.inverse_transform(
                one_d_sax.fit_transform(
                    data[features_to_compute][0:n_paa_segments *
                                              segments].values)).ravel())
        if len(sax_list_result) != len(data):
            sax_list_result.extend(
                data[features_to_compute][n_paa_segments *
                                          segments:len(data)].values)

        result = sax_list_result

    df['sax'] = result
    df['sax'] = df['sax'].astype('float')
    df = df.sort_values(['ts', 'station'])
    df = df.set_index('ts')
    return df
    # SAX transform
    n_sax_symbols = 8
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                         alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))

    # 1d-SAX transform
    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(dataset))

    graph_idx = graph_idx + 1
    plt.subplot(len(pos_relatedStock), 4, graph_idx)  # First, raw time series
    plt.plot(dataset[0].ravel(), "b-")
    plt.title("Raw time series: " + stockCode)

    graph_idx = graph_idx + 1
    plt.subplot(len(pos_relatedStock), 4, graph_idx)  # Second, PAA
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
    plt.plot(paa_dataset_inv[0].ravel(), "b-")
    plt.title("PAA: " + stockCode)

    graph_idx = graph_idx + 1
    plt.subplot(len(pos_relatedStock), 4, graph_idx)  # Then SAX
    plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
예제 #6
0
    ts = ts - np.mean(ts)
    ts /= np.std(ts, ddof=1)
    print('length of ts', len(ts))

    n_sax_symbols_avg = 8
    n_sax_symbols_slope = 8
    n_paa_segments = 10

    # 1d-SAX transform
    one_d_sax = OneD_SymbolicAggregateApproximation(
        n_segments=n_paa_segments,
        alphabet_size_avg=n_sax_symbols_avg,
        alphabet_size_slope=n_sax_symbols_slope,
        sigma_l=np.sqrt(0.03 / (np.floor(len(ts) / n_paa_segments))))
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(
        one_d_sax.fit_transform(ts))

    # Our oneD_SAX
    width = len(ts) // n_paa_segments
    onedsax = oneD_SAX(w=width,
                       k_slope=n_sax_symbols_slope,
                       k_intercept=n_sax_symbols_avg)
    onedsax_ts = onedsax.transform(ts)
    recon_onedsax = onedsax.inverse_transform(onedsax_ts)

    # plot
    plt.figure()
    plt.plot(ts, "b-", alpha=0.4)
    plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-")
    plt.plot(recon_onedsax, 'r--')
    plt.legend([
예제 #7
0
# SAX transform
n_sax_symbols = 256
sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                     alphabet_size_avg=n_sax_symbols)
sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))
print("a")

# 1d-SAX transform
n_sax_symbols_avg = 8
n_sax_symbols_slope = 8
one_d_sax = OneD_SymbolicAggregateApproximation(
    n_segments=n_paa_segments,
    alphabet_size_avg=n_sax_symbols_avg,
    alphabet_size_slope=n_sax_symbols_slope)
transformed_data = one_d_sax.fit_transform(dataset)
one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data)

plt.figure()
plt.subplot(2, 2, 1)  # First, raw time series
plt.plot(dataset[0].ravel(), "b-")
plt.title("Raw time series")

plt.subplot(2, 2, 2)  # Second, PAA
plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
plt.plot(paa_dataset_inv[0].ravel(), "b-")
plt.title("PAA")

plt.subplot(2, 2, 3)  # Then SAX
plt.plot(dataset[0].ravel(), "b-", alpha=0.4)
plt.plot(sax_dataset_inv[0].ravel(), "b-")
        
        # PAA transform (and inverse transform) of the data
        n_paa_segments = 10
        paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
        paa_dataset_inv = paa.inverse_transform(paa.fit_transform(dataset))
        df['paa'] = paa_dataset_inv[0]
        
        # SAX transform
        n_sax_symbols = 4
        sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
        sax_dataset_inv = sax.inverse_transform(sax.fit_transform(dataset))
        df['sax'] = sax_dataset_inv[0]
        
        # 1d-SAX transform
        n_sax_symbols_avg = 8
        n_sax_symbols_slope = 8
        one_d_sax = OneD_SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg,
                                                        alphabet_size_slope=n_sax_symbols_slope)
                                                        one_d_sax_dataset_inv = one_d_sax.inverse_transform(one_d_sax.fit_transform(dataset))
                                                        df['one_dsax'] = one_d_sax_dataset_inv[0]
                                                        
        df_list.append(df[['name','code','date','price','raw','paa','sax','one_dsax','Sector_main','sector']])

    except ZeroDivisionError:
        pass

#df_list
df = pd.concat(df_list)
df.to_csv('Stock_paa_sax.csv',index=False,sep=',')
df
예제 #9
0
# 想法四:利用PAA等技术
# 然后使用sklearn等库
from tslearn.piecewise import PiecewiseAggregateApproximation
from tslearn.piecewise import SymbolicAggregateApproximation, OneD_SymbolicAggregateApproximation
import time
# 1dSAX
n_paa_segments = 40
n_sax_symbols_avg = 30
n_sax_symbols_slope = 30
one_d_sax = OneD_SymbolicAggregateApproximation(
    n_segments=n_paa_segments,
    alphabet_size_avg=n_sax_symbols_avg,
    alphabet_size_slope=n_sax_symbols_slope)
transformed_data = one_d_sax.inverse_transform(
    one_d_sax.fit_transform(stdData))

from sklearn.cluster import MiniBatchKMeans, KMeans, DBSCAN, SpectralClustering, Birch
from sklearn.metrics import calinski_harabasz_score, davies_bouldin_score

n_cluster = 100

#Kmeans 结果
# 超参数:k的取值
s = time.time()
km = KMeans(n_clusters=n_cluster, random_state=0)
y_pre = km.fit_predict(transformed_data)
e = time.time()
print(e - s, "s")
print(davies_bouldin_score(transformed_data, y_pre))
예제 #10
0
 print("stockname" + str(i))
 scaleddata = scaler.fit_transform(df_red[[i]])
 #print(scaleddata)
 paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
 paa_dataset_inv = paa.inverse_transform(paa.fit_transform(scaleddata))
 # SAX transform
 sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                      alphabet_size_avg=n_sax_symbols)
 sax_dataset_inv = sax.inverse_transform(sax.fit_transform(scaleddata))
 # 1d-SAX transform
 one_d_sax = OneD_SymbolicAggregateApproximation(
     n_segments=n_paa_segments,
     alphabet_size_avg=n_sax_symbols_avg,
     alphabet_size_slope=n_sax_symbols_slope)
 one_d_sax_dataset_inv = one_d_sax.inverse_transform(
     one_d_sax.fit_transform(scaleddata))
 plt.figure()
 # First, raw time series
 plt.subplot(2, 2, 1)
 plt.plot(scaleddata[0].ravel(), "b-")
 plt.title("Raw time series")
 # Second, PAA
 plt.subplot(2, 2, 2)
 plt.plot(scaleddata[0].ravel(), "b-", alpha=0.4)
 plt.plot(paa_dataset_inv[0].ravel(), "b-")
 plt.title("PAA")
 #SAX plot
 plt.subplot(2, 2, 3)  # Then SAX
 plt.plot(scaleddata[0].ravel(), "b-", alpha=0.4)
 plt.plot(sax_dataset_inv[0].ravel(), "b-")
 plt.title("SAX, %d symbols" % n_sax_symbols)
예제 #11
0
n_sax_symbols_avg = 10
n_sax_symbols_slope = 6
for i in listnew:
    records = len(df_red[[i]])
    print("stockname"+str(i))      
    scaleddata = scaler.fit_transform(df_red[[i]])
    #print(scaleddata)      
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    paa_dataset_inv = paa.inverse_transform(paa.fit_transform(scaleddata))
    # SAX transform
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    sax_dataset_inv = sax.inverse_transform(sax.fit_transform(scaleddata))
    # 1d-SAX transform
    one_d_sax = OneD_SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols_avg,
                                                    alphabet_size_slope=n_sax_symbols_slope)
    one_d_sax_dataset_inv = one_d_sax.inverse_transform(one_d_sax.fit_transform(scaleddata))
    plt.figure()
    # First, raw time series
    plt.subplot(1, 2, 1)  
    plt.plot(scaleddata[0].ravel(), "b-")
    plt.title("Raw time series")
    plt.suptitle('Stockname: ' + i,fontsize=16)
    plt.subplot(1, 2, 2)  # Finally, 1d-SAX
    plt.plot(scaleddata[0].ravel(), "b-", alpha=0.4)
    plt.plot(one_d_sax_dataset_inv[0].ravel(), "b-")
    plt.title("1d-SAX, %d symbols (%dx%d)" % (n_sax_symbols_avg * n_sax_symbols_slope,
                                              n_sax_symbols_avg,
                                              n_sax_symbols_slope))
    plt.tight_layout()
    plt.subplots_adjust(wspace=0.8, top=0.8)
    plt.show()
예제 #12
0
paa_dataset_inv = paa.inverse_transform(paa.fit_transform(lc_nor))

# SAX transform
n_sax_symbols = 25
sax = SymbolicAggregateApproximation(n_segments=n_paa_segments,
                                     alphabet_size_avg=n_sax_symbols)
sax_dataset_inv = sax.inverse_transform(sax.fit_transform(lc_nor))

# 1d-SAX transform
n_sax_symbols_avg = 5
n_sax_symbols_slope = 5
one_d_sax = OneD_SymbolicAggregateApproximation(
    n_segments=n_paa_segments,
    alphabet_size_avg=n_sax_symbols_avg,
    alphabet_size_slope=n_sax_symbols_slope)
transformed_data = one_d_sax.fit_transform(lc_nor)
one_d_sax_dataset_inv = one_d_sax.inverse_transform(transformed_data)

#dynamic binning
lc_nor_list = list(lc_nor[0].ravel())
corePlot = sketchDyBinService(windowSize=n_paa_segments,
                              initialBin=3, isOnline=False)
sketchInstances = corePlot.sketchMode(instances=lc_nor_list)
print("a")



plt.figure()
plt.subplot(2, 2, 1)  # First, raw time series
plt.plot(timestamps,lc_nor[0].ravel(), "b-")
plt.title("Raw time series")