示例#1
0
def idijktest_euclidean():
    min_size = 30
    smooth_window_len = 5
    zero_thresh = 0.0001
    
    # load idijk data
    mat = scipy.io.loadmat('/home/zhe/Dropbox/TH/DDSC/Matlab/data/weatherstation_east-Windspeed')
    
    # fetch data and time tags
    data = mat['values'][0]
    times = mat['times'][0]
    
    data = publib.smooth(data, window_len=smooth_window_len) 
    
    sequence_ts = data[0:5000]
    sequence = np.array(sequence_ts.tolist())
#     query_ts = data['2012-03-20 09:00':'2012-03-20 21:00']
    query_ts = data[5200:5500]
    query = np.array(query_ts.tolist())    
    
    n = 3
    win_interval = 10
    knn_candidates_euclidean = bruteforce_sliding_windows(query, sequence, win_interval, n, euclidean_dist)
     # plot matching results   
    fig = plt.figure()
    ax = plt.subplot(n+1,1,1)
    ax.plot(query, linewidth=1, color='r')
    ax.set_xlim(0, len(query))
    ax.set_title("query time series" )
    title_text = []
    
    for i, candidate in enumerate(knn_candidates_euclidean):
        dist = candidate[1]
        pos = candidate[0]
        match_str_startpos = pos
        match_str_endpos = pos + len(query)
         
        ax = plt.subplot(n+1,1,i+2)
        ax.plot(range(match_str_startpos, match_str_endpos), sequence[match_str_startpos:match_str_endpos], linewidth=1, color='b')
        ax.set_title("Matching " + str(i) + ": " + ", dist=" + str(dist))
        ax.set_xlim(match_str_startpos, match_str_endpos)
             
    fig.subplots_adjust(hspace=1)
    plt.show(block=True)  
示例#2
0
def test_tno_data_matching(data, smooth_win=-1):
#     sequence_ts = data['2011-03-20 09:00':'2012-03-10 08:00']
    print type(data.index)
    
    sequence_ts = data['2011-10-20 09:00':'2011-12-20 09:00']
    sequence = np.array(sequence_ts.tolist())
#     query_ts = data['2012-03-20 09:00':'2012-03-20 21:00']
    query_ts = data['2011-10-17 14:31':'2011-10-20 02:30']
    print query_ts.head(20)
    print type(query_ts.index)
    print len(query_ts)

#     query_ts = query_ts.resample('3Min', how='mean')    
    query = np.array(query_ts.tolist())
    print len(query)
    
#     sequence = (sequence - np.mean(sequence)) / np.std(sequence)
#     query = (query - np.mean(query)) / np.std(query)
       
    if smooth_win > 0:
        query = publib.smooth(query, smooth_win)
        query_ts = pd.Series(query, index=query_ts.index)
        sequence = publib.smooth(sequence, smooth_win)
        sequence_ts = pd.Series(sequence, index=sequence_ts.index)        
   
    k = 3
    win_interval = 60

#     knn_candidates = bruteforce_sliding_windows(query, sequence, 5, 4, dtw_dist_constrain, dist_only=True, r=5)
#     knn_candidates = LB_DTW_sliding_windows(query, sequence, 30, 4, 30)
    
    start_t = time.time()
    knn_candidates_euclidean = bruteforce_sliding_windows(query, sequence, win_interval, k, euclidean_dist)
    time_euclidean = time.time() - start_t
    print "Euclidean takes: " + str(time_euclidean)
                     
    xfmt = md.DateFormatter('%H')   
    plt.figure(1)    
    ax = plt.subplot(len(knn_candidates_euclidean)+1, 1, 1)
    plt.title("query time series: from %s to %s" % (str(query_ts.index[0]), str(query_ts.index[-1])), fontsize=10)
    ax.xaxis.set_major_formatter(xfmt)
    query_ts.plot(color='g')
    for i, candidate in enumerate(knn_candidates_euclidean):
        ax = plt.subplot(len(knn_candidates_euclidean)+1, 1, i+2)
        dist = candidate[1]
        pos = candidate[0]
#         subseq_start_time = str(sequence_ts.index[pos])
#         subseq_end_time = str(sequence_ts.index[pos+len(query)])
#         plt.title("KNN %d matching, from %s to %s, dist=%f" % (i+1, subseq_start_time, subseq_end_time, dist), fontsize=10)
#         ax.xaxis.set_major_formatter(xfmt)
#         sequence_ts.iloc[pos:pos+len(query)].plot(color='b')
        
        ax.plot(range(pos, pos+len(query)), sequence[pos:pos+len(query)], linewidth=1, color='b')
        ax.set_xlim(pos, pos+len(query))
        ax.set_title("Matching " + str(i) + ": dist=" + str(dist))
    plt.show() 
#     
#     start_t = time.time()    
#     knn_candidates_dtw = LB_DTW_sliding_windows(query, sequence, win_interval, k, win_interval)
#     time_dtw = time.time() - start_t    
#     print "DTW takes: " + str(time_dtw)
#         
#     plt.figure(2)
#     ax = plt.subplot(len(knn_candidates_dtw)+1, 1, 1)
#     plt.title("query time series: from %s to %s" % (str(query_ts.index[0]), str(query_ts.index[-1])), fontsize=10)
#     ax.xaxis.set_major_formatter(xfmt)
#     query_ts.plot(color='g')  
#          
#     for i, candidate in enumerate(knn_candidates_dtw):
#         ax = plt.subplot(len(knn_candidates_dtw)+1, 1, i+2)
#         dist = candidate[1]
#         pos = candidate[0]
#         subseq_start_time = str(sequence_ts.index[pos])
#         subseq_end_time = str(sequence_ts.index[pos+len(query)])
#         plt.title("DTW %d matching, from %s to %s, dist=%f" % (i+1, subseq_start_time, subseq_end_time, dist), fontsize=10)
#         ax.xaxis.set_major_formatter(xfmt)
#         sequence_ts.iloc[pos:pos+len(query)].plot(color='b')    
#     plt.show()           
          
    print "length of query ts: " + str(len(query))
    print "length of sequence: " + str(len(sequence))