def idijktest_euclidean(): min_size = 30 smooth_window_len = 5 zero_thresh = 0.0001 # load idijk data mat = scipy.io.loadmat('/home/zhe/Dropbox/TH/DDSC/Matlab/data/weatherstation_east-Windspeed') # fetch data and time tags data = mat['values'][0] times = mat['times'][0] data = publib.smooth(data, window_len=smooth_window_len) sequence_ts = data[0:5000] sequence = np.array(sequence_ts.tolist()) # query_ts = data['2012-03-20 09:00':'2012-03-20 21:00'] query_ts = data[5200:5500] query = np.array(query_ts.tolist()) n = 3 win_interval = 10 knn_candidates_euclidean = bruteforce_sliding_windows(query, sequence, win_interval, n, euclidean_dist) # plot matching results fig = plt.figure() ax = plt.subplot(n+1,1,1) ax.plot(query, linewidth=1, color='r') ax.set_xlim(0, len(query)) ax.set_title("query time series" ) title_text = [] for i, candidate in enumerate(knn_candidates_euclidean): dist = candidate[1] pos = candidate[0] match_str_startpos = pos match_str_endpos = pos + len(query) ax = plt.subplot(n+1,1,i+2) ax.plot(range(match_str_startpos, match_str_endpos), sequence[match_str_startpos:match_str_endpos], linewidth=1, color='b') ax.set_title("Matching " + str(i) + ": " + ", dist=" + str(dist)) ax.set_xlim(match_str_startpos, match_str_endpos) fig.subplots_adjust(hspace=1) plt.show(block=True)
def test_tno_data_matching(data, smooth_win=-1): # sequence_ts = data['2011-03-20 09:00':'2012-03-10 08:00'] print type(data.index) sequence_ts = data['2011-10-20 09:00':'2011-12-20 09:00'] sequence = np.array(sequence_ts.tolist()) # query_ts = data['2012-03-20 09:00':'2012-03-20 21:00'] query_ts = data['2011-10-17 14:31':'2011-10-20 02:30'] print query_ts.head(20) print type(query_ts.index) print len(query_ts) # query_ts = query_ts.resample('3Min', how='mean') query = np.array(query_ts.tolist()) print len(query) # sequence = (sequence - np.mean(sequence)) / np.std(sequence) # query = (query - np.mean(query)) / np.std(query) if smooth_win > 0: query = publib.smooth(query, smooth_win) query_ts = pd.Series(query, index=query_ts.index) sequence = publib.smooth(sequence, smooth_win) sequence_ts = pd.Series(sequence, index=sequence_ts.index) k = 3 win_interval = 60 # knn_candidates = bruteforce_sliding_windows(query, sequence, 5, 4, dtw_dist_constrain, dist_only=True, r=5) # knn_candidates = LB_DTW_sliding_windows(query, sequence, 30, 4, 30) start_t = time.time() knn_candidates_euclidean = bruteforce_sliding_windows(query, sequence, win_interval, k, euclidean_dist) time_euclidean = time.time() - start_t print "Euclidean takes: " + str(time_euclidean) xfmt = md.DateFormatter('%H') plt.figure(1) ax = plt.subplot(len(knn_candidates_euclidean)+1, 1, 1) plt.title("query time series: from %s to %s" % (str(query_ts.index[0]), str(query_ts.index[-1])), fontsize=10) ax.xaxis.set_major_formatter(xfmt) query_ts.plot(color='g') for i, candidate in enumerate(knn_candidates_euclidean): ax = plt.subplot(len(knn_candidates_euclidean)+1, 1, i+2) dist = candidate[1] pos = candidate[0] # subseq_start_time = str(sequence_ts.index[pos]) # subseq_end_time = str(sequence_ts.index[pos+len(query)]) # plt.title("KNN %d matching, from %s to %s, dist=%f" % (i+1, subseq_start_time, subseq_end_time, dist), fontsize=10) # ax.xaxis.set_major_formatter(xfmt) # sequence_ts.iloc[pos:pos+len(query)].plot(color='b') ax.plot(range(pos, pos+len(query)), sequence[pos:pos+len(query)], linewidth=1, color='b') ax.set_xlim(pos, pos+len(query)) ax.set_title("Matching " + str(i) + ": dist=" + str(dist)) plt.show() # # start_t = time.time() # knn_candidates_dtw = LB_DTW_sliding_windows(query, sequence, win_interval, k, win_interval) # time_dtw = time.time() - start_t # print "DTW takes: " + str(time_dtw) # # plt.figure(2) # ax = plt.subplot(len(knn_candidates_dtw)+1, 1, 1) # plt.title("query time series: from %s to %s" % (str(query_ts.index[0]), str(query_ts.index[-1])), fontsize=10) # ax.xaxis.set_major_formatter(xfmt) # query_ts.plot(color='g') # # for i, candidate in enumerate(knn_candidates_dtw): # ax = plt.subplot(len(knn_candidates_dtw)+1, 1, i+2) # dist = candidate[1] # pos = candidate[0] # subseq_start_time = str(sequence_ts.index[pos]) # subseq_end_time = str(sequence_ts.index[pos+len(query)]) # plt.title("DTW %d matching, from %s to %s, dist=%f" % (i+1, subseq_start_time, subseq_end_time, dist), fontsize=10) # ax.xaxis.set_major_formatter(xfmt) # sequence_ts.iloc[pos:pos+len(query)].plot(color='b') # plt.show() print "length of query ts: " + str(len(query)) print "length of sequence: " + str(len(sequence))