def monotonousSigTest(): # generate a synthetic signal which composed by 10 basic shape and has the length of 3000 test_ts, _, _ = monotone_randomsignal(10,3000,200) a = SSSTSR_Class(ts=test_ts) a.build_seg(plotSwitch = False, minSize=50) a.seg_encode() fig = plt.figure(figsize=(24, 4)) plot_segts_fit(ts=a.get_ts(), seg_ts=a.get_seg_ts(), seg_fit=a.get_fit_list(), imshow=True, shapelist = a.get_symbol_list) print a.get_symbol_list print ''.join(a.shapelist)
def idijktest_segmentwise(): min_size = 30 smooth_window_len = 5 zero_thresh = 0.0001 data = test_tno_data_prep() # sequence_ts = data['2011-03-20 09:00':'2012-03-10 08:00'] # sequence = np.array(sequence_ts.tolist()) # # query_ts = data['2012-03-20 09:00':'2012-03-20 21:00'] # query_ts = data['2011-10-17 14:31':'2011-10-20 02:30'] # query = np.array(query_ts.tolist()) sequence_ts = data['2011-10-20 09:00':] sequence = np.array(sequence_ts.tolist()) # query_ts = data['2012-03-20 09:00':'2012-03-20 21:00'] # query_ts = data['2011-10-17 14:31':'2011-10-18 02:30'] query_ts = data['2011-10-17 14:31':'2011-10-20 02:30'] query = np.array(query_ts.tolist()) # adjust smooth_window_len to get different level of smoothing a = SSSTSR_Class(ts=sequence, smooth_window_len = smooth_window_len, start=0, end = 80000, zero_thresh=zero_thresh) a.build_seg(plotSwitch = False, minSize=min_size) a.seg_encode() a.plot() print "Word of the database:\t" + a.get_word() print "The number of symbols:\t" + str(len(a.get_word())) print "-----------------------------------" b = SSSTSR_Class(ts=query, smooth_window_len = smooth_window_len, zero_thresh=zero_thresh, scale_train_ts = a) b.build_seg(plotSwitch = False, minSize=min_size) b.seg_encode() b.plot() print "The keyword is \"" + b.get_word() + "\", contains " + str(len(b.get_word())) + " symbols" print "-----------------------------------" a.seg_enhance_represent() b.seg_enhance_represent() plt.show(block=False) # search the best n matching n = 3 win_interval = 1 print "Searching the best " + str(n) + " matching time series pieces..." # plot matching results fig = plt.figure() ax = plt.subplot(n+1,1,1) # ax.plot(b.get_smooth_ts(), linewidth=1, color='r') # iterate each segment and plot ts=b.get_smooth_ts() seg_ts=b.get_seg_ts() for i in range(len(seg_ts)): seg = seg_ts[i] startpoint = seg[0] endpoint = seg[1] ax.plot(range(startpoint, endpoint), ts[startpoint:endpoint], linewidth=1, color='g') ax.set_xlim((seg_ts[0])[0], seg_ts[-1][-1]) # plot vertical line to divide segments, just draw the line in the middle of two segments for seg in seg_ts[:-1]: # seg[1] is the exclusive(!!!) end point seg_boundary = seg[1] - 0.5 ax.axvline(seg_boundary, linewidth=2, color='k') ax.set_title("keyword time series: " + b.get_word()) ts = a.get_smooth_ts() segs = a.get_seg_ts() title_text = [] #knn_candidates_euclidean = segmentwise_match(b.enhance_seg, a.enhance_seg, win_interval, n) knn_candidates_euclidean = segmentwise_match_invariance(b.enhance_seg, a.enhance_seg, win_interval, n, offset_inv = True )# longitude_inv=False, amplitude_inv=False, lineardrift_inv=False): for i, candidate in enumerate(knn_candidates_euclidean): ax = plt.subplot(len(knn_candidates_euclidean)+1, 1, i+2) dist = candidate[1] pos = candidate[0] len_matched_word = len(b.enhance_seg) match_str_startpos = (segs[pos])[0] match_str_endpos = (segs[pos+len_matched_word-1])[1] ax = plt.subplot(n+1,1,i+2) print match_str_startpos, match_str_endpos ax.plot(range(match_str_startpos, match_str_endpos), ts[match_str_startpos:match_str_endpos], linewidth=1, color='b') ax.set_xlim(match_str_startpos, match_str_endpos) seg_ts = a.get_seg_ts() for seg in seg_ts[pos:pos+len_matched_word]: # seg[1] is the exclusive(!!!) end point seg_boundary = seg[1] - 0.5 ax.axvline(seg_boundary, linewidth=2, color='k') ax.set_title("Matching " + str(i) + ": " + a.get_word()[pos:pos+len_matched_word] + ", dist=" + str(dist)) fig.subplots_adjust(hspace=1) plt.show(block=True) fig.subplots_adjust(hspace=1) plt.show(block=True)
def idijktest(): min_size = 5 smooth_window_len = 5 zero_thresh = 0.001 # load idijk data # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Humidity.mat') # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_west-Rainfall.mat') # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Radiation.mat') # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Temperature') # mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Winddirection') mat = scipy.io.loadmat('D:\Dropbox\TH\DDSC\Matlab\data\weatherstation_east-Windspeed') # fetch data and time tags data = mat['values'][0] times = mat['times'][0] # adjust smooth_window_len to get different level of smoothing a = SSSTSR_Class(ts=data, smooth_window_len = smooth_window_len, start=0, end=5000, zero_thresh=zero_thresh) a.build_seg(plotSwitch = False, minSize=min_size) a.seg_encode() a.plot() print "Word of the database:\t" + a.get_word() print "The number of symbols:\t" + str(len(a.get_word())) print "-----------------------------------" piece_range = range(5200, 5400) # piece_range = range(1600, 1800) # piece_range = range(5000, 5200) piece = [data[i] for i in piece_range] piece_noise = piece + np.random.randn(len(piece))*2 b = SSSTSR_Class(ts=piece, smooth_window_len = smooth_window_len, zero_thresh=zero_thresh) b.build_seg(plotSwitch = True, minSize=min_size) b.seg_encode() b.plot() print "The keyword is \"" + b.get_word() + "\", contains " + str(len(b.get_word())) + " symbols" print "-----------------------------------" # search the best n matching n = 3 print "Searching the best " + str(n) + " matching time series pieces..." # Hamming distance matching match_pos_list, match_dist_list = hamming_match_best_effort(b.get_word(), a.get_word(), n) for i in range(n): pos = match_pos_list[i] dist = match_dist_list[i] match_str_startpos = (a.get_seg_ts()[pos])[0] match_str_endpos = (a.get_seg_ts()[pos+len(b.get_word())-1])[1] print "Matching " + str(i+1) + ":\t" + a.get_word()[pos:pos+len(b.get_word())-1] + \ "[" + str(match_str_startpos) + ":" + str(match_str_endpos) + "] dist=" + str(dist) # plot matching results fig = plt.figure() ax = plt.subplot(n+1,1,1) ax.plot(piece_range, b.get_smooth_ts(), linewidth=1, color='r') ax.set_title("keyword time series: " + b.get_word()) ts = a.get_smooth_ts() segs = a.get_seg_ts() title_text = [] for i in range(n): pos = match_pos_list[i] dist = match_dist_list[i] match_str_startpos = (segs[pos])[0] match_str_endpos = (segs[pos+len(b.get_word())])[1] ax = plt.subplot(n+1,1,i+2) ax.plot(range(match_str_startpos, match_str_endpos), ts[match_str_startpos:match_str_endpos], linewidth=1, color='b') ax.set_title("Matching " + str(i) + ": " + a.get_word()[pos:pos+len(b.get_word())] + ", dist=" + str(dist)) fig.subplots_adjust(hspace=1) plt.show(block=True)
def idijktest(): min_size = 30 smooth_window_len = 0 zero_thresh = 0.01 data = test_tno_data_prep() # sequence_ts = data['2011-03-20 09:00':'2012-03-10 08:00'] # sequence = np.array(sequence_ts.tolist()) # # query_ts = data['2012-03-20 09:00':'2012-03-20 21:00'] # query_ts = data['2011-10-17 14:31':'2011-10-20 02:30'] # query = np.array(query_ts.tolist()) sequence_ts = data['2011-10-20 09:00':] sequence = np.array(sequence_ts.tolist()) # query_ts = data['2012-03-20 09:00':'2012-03-20 21:00'] query_ts = data['2011-10-17 14:31':'2011-10-20 02:30'] query = np.array(query_ts.tolist()) # adjust smooth_window_len to get different level of smoothing a = SSSTSR_Class(ts=sequence, smooth_window_len = smooth_window_len, start=0, end=8000, zero_thresh=zero_thresh) a.build_seg(plotSwitch = False, minSize=min_size) a.seg_encode() a.plot() print "Word of the database:\t" + a.get_word() print "The number of symbols:\t" + str(len(a.get_word())) print "-----------------------------------" b = SSSTSR_Class(ts=query, smooth_window_len = smooth_window_len, zero_thresh=0.05) b.build_seg(plotSwitch = False, minSize=min_size) b.seg_encode() b.plot() print "The keyword is \"" + b.get_word() + "\", contains " + str(len(b.get_word())) + " symbols" print "-----------------------------------" # search the best n matching n = 3 print "Searching the best " + str(n) + " matching time series pieces..." # Levenshtein distance matching match_pos_list, matching_words, match_dist_list, _ = leven_match(b.get_word(), a.get_word(), n) for i in range(n): len_matched_word = len(matching_words[i]) # len_matched_word = len(b.get_word()) pos = match_pos_list[i] dist = match_dist_list[i] match_str_startpos = (a.get_seg_ts()[pos])[0] match_str_endpos = (a.get_seg_ts()[pos+len_matched_word-1])[1] print "Matching " + str(i+1) + ":\t" + a.get_word()[pos:pos+len_matched_word] + \ "[" + str(match_str_startpos) + ":" + str(match_str_endpos) + "] dist=" + str(dist) # plot matching results fig = plt.figure() ax = plt.subplot(n+1,1,1) ax.plot(b.get_smooth_ts(), linewidth=1, color='r') ax.set_title("keyword time series: " + b.get_word()) ts = a.get_smooth_ts() segs = a.get_seg_ts() title_text = [] for i in range(n): len_matched_word = len(matching_words[i]) pos = match_pos_list[i] dist = match_dist_list[i] match_str_startpos = (segs[pos])[0] match_str_endpos = (segs[pos+len_matched_word-1])[1] ax = plt.subplot(n+1,1,i+2) ax.plot(range(match_str_startpos, match_str_endpos), ts[match_str_startpos:match_str_endpos], linewidth=1, color='b') ax.set_title("Matching " + str(i) + ": " + a.get_word()[pos:pos+len_matched_word] + ", dist=" + str(dist)) fig.subplots_adjust(hspace=1) plt.show(block=True)