def get_dtw_wrapping_path(data, col1, col2): ''' input: data: dataframe 源数据 col1: 列名 col2:列名 output: 输出图像,上方是col1,下方是col2 ''' indicators = [i for i in data.columns if i not in 'date'] array_subset = data[indicators].values array_subset_zscore = stats.zscore(array_subset) array_subset_zscore_T = array_subset_zscore.T x_idx = indicators.index(col1) y_idx = indicators.index(col2) # x = array_for_dtw_zscore_T[col1,:] # y = array_for_dtw_zscore_T[col2,:] x = array_subset_zscore_T[x_idx, :] y = array_subset_zscore_T[y_idx, :] path = dtw.warping_path(x, y) outname = col1 + 'vs' + col2 ds_xy = dtw.distance(x, y) dtwvis.plot_warping( x, y, path, filename= "D:/Pythoncode/JD_mart/operation_flow_distribution/DTW_for_business/results/%s.png" % outname) print("%s 和 %s 的DTW距离: %2.4f" % (col1, col2, ds_xy))
def test_psi_dtw_1a(): with util_numpy.test_uses_numpy() as np: x = np.arange(0, 20, .5) s1 = np.sin(x) s2 = np.sin(x - 1) # Add noise # random.seed(1) # for idx in range(len(s2)): # if random.random() < 0.05: # s2[idx] += (random.random() - 0.5) / 2 d, paths = dtw.warping_paths(s1, s2, psi=2, window=25) path = dtw.warping_path(s1, s2, psi=2) if not dtwvis.test_without_visualization(): if directory: dtwvis.plot_warpingpaths(s1, s2, paths, path, filename=str(directory / "test_psi_dtw_1a.png")) # print(paths[:,:]) # dtwvis.plot_warping(s1, s2, path, filename=os.path.expanduser("~/Desktop/test_psi_dtw_1_1.png")) # path = dtw.best_path(paths) # dtwvis.plot_warpingpaths(s1, s2, paths, path, filename=os.path.expanduser("~/Desktop/test_psi_dtw_1_2.png")) np.testing.assert_equal(d, 0.0)
def _calculate_dtw_over_time_window(window, minimum_close, maximum_close, pattern, start_window, end_window, threshold): calculations = [] minimum_value_pattern, maximum_value_pattern = pattern.get_min_max() normalized_data = [ normalize_function_paper(close_price, minimum_close, maximum_close, minimum_value_pattern, maximum_value_pattern) for close_price in window ] # distance, path = fastdtw(normalized_data, pattern.get_pattern(), dist=euclidean) # alignment = dtw(normalized_data, pattern.get_pattern(), keep_internals=True) distance = dtw.distance(normalized_data, pattern.get_pattern()) path = dtw.warping_path(normalized_data, pattern.get_pattern()) if distance < threshold: calculations.append({ "pattern_name": pattern.__class__.__name__, "start_window": start_window, "start_date": list_of_dates[start_window], "end_window": end_window, "end_date": list_of_dates[end_window], "distance": distance, "path": path, "normalized_data": normalized_data, "pattern": pattern.get_pattern() }) return calculations
def filter_cuts(shifted_observed_runs: List[int], expected_runs: List[int]) -> Tuple[List[int], List[int], int]: """ Applies dynamic time warping to select cuts in the observed pixel sequence of whitespace runs according to the expected runs :param shifted_observed_runs: histogram of whitespace runs encoded as follows: position = start of run, value = length of run :param expected_runs: histogram of expected word cuts (see: `expected_runs_for_line`) :return: A triplet `(cuts, cuts_indices, distance)` where: * cuts: list of x-coordinates of word cuts * cuts_indices: indices of selected cuts * distance: DTW distance between the two input sequences """ path = dtw.warping_path(expected_runs, shifted_observed_runs) distance = dtw.distance(expected_runs, shifted_observed_runs) runs_indices = [i for i, x in enumerate(shifted_observed_runs) if x > 0] runs_indices.insert(0, 0) cuts = [] cuts_indices = [] for i, j in path: if expected_runs[i] > 0: cuts.append(j) index_found = j in runs_indices if not index_found: print( f"DTW associated expected peak in {i} to zero value in {j}", file=sys.stderr) else: cuts_indices.append(runs_indices.index(j)) return cuts, cuts_indices, distance
def print_dtw(series_1, series_2, output_path): """ Function to draw the DTW for two different time series :param series_1: First time serie to compare :param series_2: Second time serie to compare :param output_path: Path where the pictures will be stored """ len1 = roundup(series_1.__len__()) len2 = roundup(series_2.__len__()) contador = 0 series_1 = series_1 series_2 = series_2 series_1 = series_1[:len1] series_1 = np.split(series_1, int(len1 / 100)) series_2 = series_2[:len2] series_2 = np.split(series_2, int(len2 / 100)) for i in range(series_1.__len__()): path = dtw.warping_path(series_1[i], series_2[i]) print(path) dtwvis.plot_warping(series_1[i], series_2[i], path, filename=output_path % contador) contador += 1
def test_twoleadecg_1(directory=None): with util_numpy.test_uses_numpy() as np: s1 = np.array([1.8896,-0.23712,-0.23712,-0.20134,-0.16556,-0.20134,-0.16556,-0.12978,-0.058224,0.013335,0.031225,0.10278,0.013335,-0.094004,-0.058224,-0.11189,-0.14767,-0.16556,-0.14767,-0.094004,-0.14767,-0.16556,-0.16556,-0.21923,-0.21923,-0.25501,-0.20134,-0.20134,-0.18345,-0.23712,-0.20134,-0.23712,-0.12978,-0.11189,-0.46969,-1.2747,-2.3481,-2.8133,-2.7775,-2.5986,-2.3839,-2.0082,-1.8651,-1.6146,-1.3463,-1.1495,-0.88115,-0.55914,-0.34446,-0.16556,-0.0045548,0.2459,0.53214,0.65737,0.71104,0.74682,0.76471,0.76471,0.80049,0.81838,0.87204,0.88993,0.97938,0.97938,1.0152,1.0867,1.1583,1.1762,1.212,1.2656,1.2656,1.2477,1.2656,1.1762,1.0867,0.99727,0.88993,0.74682,0.63948,0.58581,0.47847,0.38902]) s2 = np.array([1,0.93163,0.094486,0.094486,0.038006,0.080366,0.080366,0.052126,0.080366,0.12273,0.22157,0.29217,0.41925,0.48985,0.39101,0.39101,0.30629,0.24981,0.19333,0.080366,-0.0043544,-0.018474,-0.089075,-0.11731,-0.14555,-0.17379,-0.21615,-0.27263,-0.20203,-0.315,-0.25851,-0.17379,-0.28675,-0.24439,0.16509,-0.11731,-1.0069,-1.9812,-2.4895,-2.786,-2.9272,-2.4612,-2.0518,-1.8964,-1.8258,-1.7411,-1.6705,-1.2893,-0.99276,-0.65388,-0.37148,-0.30087,-0.046714,0.30629,0.53221,0.65929,0.65929,0.72989,0.74401,0.87109,0.89933,0.95581,0.96993,1.0546,1.1394,1.2523,1.2523,1.2947,1.3088,1.3512,1.2806,1.2806,1.1394,1.097,0.89933,0.72989,0.67341,0.54633,0.37689,0.23569,0.10861,0.080366,-0.074955]) d, paths = dtw.warping_paths(s1, s2, psi=2, window=5) path = dtw.warping_path(s1, s2, psi=2) if directory: dtwvis.plot_warping(s1, s2, path, filename=str(directory / "warping.png")) path = dtw.best_path(paths) dtwvis.plot_warpingpaths(s1, s2, paths, path, filename=str(directory / "warpingpaths.png"))
def dtw_visual(x, y): # shape X, Y np.array([0., 0, 1, 2, 1, 0, 2, 1, 0, 0]) """ Plot to show how dtw works :param x: time series :param y: time series :return: """ from dtaidistance import dtw from dtaidistance import dtw_visualisation as dtwvis path = dtw.warping_path(x, y) dtwvis.plot_warping(x, y, path, filename="tmp.png")
def test_warping_path1(): with util_numpy.test_uses_numpy() as np: s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 0, 0]) s2 = np.array([0., 1, 2, 0, 0, 0, 0, 0, 0, 0, 0]) path1 = dtw.warping_path(s1, s2) path2 = dtw.warping_path_fast(s1, s2) path3 = [(0, 0), (1, 0), (2, 1), (3, 2), (4, 3), (5, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10)] assert len(path1) == len(path3) assert len(path2) == len(path3) assert all(ai1 == bi1 and ai2 == bi2 for ((ai1, ai2), (bi1, bi2)) in zip(path1, path3)) assert all(ai1 == bi1 and ai2 == bi2 for ((ai1, ai2), (bi1, bi2)) in zip(path2, path3))
def dta_dtw(signalA, signalB, **dtw_kwargs): ''' The function bundles the path and distance of the dtaidistance package. This is the underlying process to be applied in the HDTW process. :param signalA: The first signal to apply DTW on :param signalB: The second signal to apply DTW on :param **dtw_kwargs: any key-word arguments to be propogated to the functions. ''' return dtw.distance_fast(signalA, signalB,**dtw_kwargs), \ dtw.warping_path(signalA, signalB, **dtw_kwargs)
def test_normalize(): with util_numpy.test_uses_numpy() as np: s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, 0]) s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0]) r, path = dtw.warp(s1, s2) if directory: dtwvis.plot_warp(s1, s2, r, path, filename=str(directory / "test_normalize1.png")) r_c = np.array([0., 1., 2., 2., 1., 0.5, 0., 0., 2., 1., 0., 0., 0.]) if directory: path = dtw.warping_path(s1, s2, psi=2) dtwvis.plot_warping(s1, s2, path, filename=str(directory / "test_normalize2.png")) np.testing.assert_almost_equal(r, r_c, decimal=4)
def test_bug4(): with util_numpy.test_uses_numpy() as np: s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, 0]) s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0]) path = dtw.warping_path(s1, s2) if directory: fn = directory / "bug4.png" else: file = tempfile.NamedTemporaryFile() fn = Path(file.name + "_bug4.png") dtwvis.plot_warping(s1, s2, path, filename=str(fn))
def test_twoleadecg_1(): """Example from http://www.timeseriesclassification.com/description.php?Dataset=TwoLeadECG""" with util_numpy.test_uses_numpy() as np: s1 = np.array([1.8896,-0.23712,-0.23712,-0.20134,-0.16556,-0.20134,-0.16556,-0.12978,-0.058224,0.013335,0.031225,0.10278,0.013335,-0.094004,-0.058224,-0.11189,-0.14767,-0.16556,-0.14767,-0.094004,-0.14767,-0.16556,-0.16556,-0.21923,-0.21923,-0.25501,-0.20134,-0.20134,-0.18345,-0.23712,-0.20134,-0.23712,-0.12978,-0.11189,-0.46969,-1.2747,-2.3481,-2.8133,-2.7775,-2.5986,-2.3839,-2.0082,-1.8651,-1.6146,-1.3463,-1.1495,-0.88115,-0.55914,-0.34446,-0.16556,-0.0045548,0.2459,0.53214,0.65737,0.71104,0.74682,0.76471,0.76471,0.80049,0.81838,0.87204,0.88993,0.97938,0.97938,1.0152,1.0867,1.1583,1.1762,1.212,1.2656,1.2656,1.2477,1.2656,1.1762,1.0867,0.99727,0.88993,0.74682,0.63948,0.58581,0.47847,0.38902]) s2 = np.array([1,0.93163,0.094486,0.094486,0.038006,0.080366,0.080366,0.052126,0.080366,0.12273,0.22157,0.29217,0.41925,0.48985,0.39101,0.39101,0.30629,0.24981,0.19333,0.080366,-0.0043544,-0.018474,-0.089075,-0.11731,-0.14555,-0.17379,-0.21615,-0.27263,-0.20203,-0.315,-0.25851,-0.17379,-0.28675,-0.24439,0.16509,-0.11731,-1.0069,-1.9812,-2.4895,-2.786,-2.9272,-2.4612,-2.0518,-1.8964,-1.8258,-1.7411,-1.6705,-1.2893,-0.99276,-0.65388,-0.37148,-0.30087,-0.046714,0.30629,0.53221,0.65929,0.65929,0.72989,0.74401,0.87109,0.89933,0.95581,0.96993,1.0546,1.1394,1.2523,1.2523,1.2947,1.3088,1.3512,1.2806,1.2806,1.1394,1.097,0.89933,0.72989,0.67341,0.54633,0.37689,0.23569,0.10861,0.080366,-0.074955]) d, paths = dtw.warping_paths(s1, s2, psi=2, window=5) path = dtw.warping_path(s1, s2, psi=2) if not dtwvis.test_without_visualization(): if directory: import matplotlib.pyplot as plt fig, axs = dtwvis.plot_warping(s1, s2, path) # type: plt.Figure, plt.axes.Axes fig.set_size_inches(12, 10) fig.set_dpi(100) fig.savefig(str(directory / "warping.png")) plt.close(fig) path = dtw.best_path(paths) dtwvis.plot_warpingpaths(s1, s2, paths, path, filename=str(directory / "warpingpaths.png"))
def mean_of_cluster(series, Z, parent_idx, dp): if parent_idx >= num_series: parent_idx -= num_series # if this index has already been calculated, return its results if parent_idx in dp: return dp[parent_idx][0], dp[parent_idx][1], dp[parent_idx][2] child1_idx = int(Z[parent_idx][0]) child2_idx = int(Z[parent_idx][1]) # If the child_idx is not a reference to a different index, then it is a reference to a series id, so get that series and give it a weight of 1 if child1_idx < num_series: child1 = series[child1_idx] child1_label = child1_idx child1_weight = 1 # If the child_idx is a reference to another index, recursively get the mean_series of that index else: child1, child1_label, child1_weight = mean_of_cluster(series, Z, child1_idx, dp) if child2_idx < num_series: child2 = series[child2_idx] child2_label = child2_idx child2_weight = 1 else: child2, child2_label, child2_weight = mean_of_cluster(series, Z, child2_idx, dp) # Get the warping path from child1's series to child2's series path = dtw.warping_path(child1, child2) # the initial mean will be longer than the length of either child series, since it will include every path connection mean_long = [] for pair in path: child1_val = child1[pair[0]] child2_val = child2[pair[1]] # Calculate the mean using the children's weight. Each series that the child includes adds another unit of weight to the child's series mid = (child1_val * child1_weight + child2_val * child2_weight) \ / (child1_weight + child2_weight) mean_long.append(mid) # transform into np.array to allow for interpolation to be applied to it mean_long = np.asarray(mean_long, dtype=np.double) # Interpolation creates a function that follows the curve defined be mean_long mean_interp = interp.interp1d(np.arange(mean_long.size), mean_long) # Sample the function with num_pts_per_series samplings to approximate mean_long in with num_pts_per_series points mean_compress = mean_interp(np.linspace(0, mean_long.size-1, num_pts_per_series)) # input results to dp dictionary for future reference dp[parent_idx] = [mean_compress, (child1_label, child2_label), \ child1_weight + child2_weight] return mean_compress, (child1_label, child2_label), child1_weight + child2_weight
def dtw_(self, length_min, length_max): path = dtw.warping_path(self.new_real_normal[length_min:length_max], self.ncsimul_y_normal[length_min:length_max]) distance, paths = dtw.warping_paths( self.new_real_normal[length_min:length_max], self.ncsimul_y_normal[length_min:length_max]) dtwvis.plot_warping(self.new_real_normal[length_min:length_max], self.ncsimul_y_normal[length_min:length_max], path, filename="warp" + str(self.test) + ".png") best_path = dtw.best_path(paths) dtwvis.plot_warpingpaths(self.new_real_normal[length_min:length_max], self.ncsimul_y_normal[length_min:length_max], paths, best_path, filename="best_path" + str(self.test) + ".png")
def main(): s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0]) s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0]) path = dtw.warping_path(s1, s2) dtwvis.plot_warping(s1, s2, path) plt.figure(1) plt.subplot(211) plt.title('Timeseries: s1 & s2') plt.plot(s1) plt.subplot(212) plt.plot(s2) plt.show() dist = dtw.distance(s1, s2) print(dist) plt.figure(2) d, paths = dtw.warping_paths(s1, s2, window=3, psi=2) best_path = dtw.best_path(paths) dtwvis.plot_warpingpaths(s1, s2, paths, best_path)
def prep_dtw(y, y_, min, max, file_): try: len(y) >= max and len(y_) >= max except: raise NameError('the maximum lengh not respects lenght of inputs') else: path = dtw.warping_path(y[min:max], y_[min:max]) distance, paths = dtw.warping_paths(y[min:max], y_[min:max]) dtwvis.plot_warping(y[min:max], y_[min:max], path, filename=file_ + "warp_results.png") best_path = dtw.best_path(paths) dtwvis.plot_warpingpaths(y[min:max], y_[min:max], paths, best_path, filename=file_ + "best_path_results.png") return path, distance
from dtaidistance import dtw from dtaidistance import dtw_visualisation as dtwvis import pandas as pd # 计算股票间的对数收益时间序列间的动态时间规整距离 path1,path2 = "000001.XSHE.csv","000063.XSHE.csv", feature = "rclose" length = 20 window,psi = 10,5 rc1,rc2 = pd.read_csv(path1)[feature][:length],pd.read_csv(path2)[feature][:length] dis, paths = dtw.warping_paths(rc1, rc2, window=window, psi=psi) # 动态时间规整距离 print(dis) # 绘图(输出形式) best_path = dtw.best_path(paths) dtwvis.plot_warpingpaths(rc1, rc2, paths, best_path,shownumbers=True) # 绘图(保存) path = dtw.warping_path(rc1, rc2) dtwvis.plot_warping(rc1, rc2, path, filename="wrapping.png")
x = series[comparison] xallm = [] tallm = [] for n in nodes: if n < 3687: y = series[int(n)] map_x, map_y = list(zip(*dtw.warping_path(x, y))) map_x = np.asarray(map_x) map_y = np.asarray(map_y) if len(y): maxl = len(y) xallm += list(y[map_y]) tallm += range(len(map_y)) if i == 1: maxl = len(x)
except IndexError: #get median offset to apply to match spacecraft off_speed = p_mat.SPEED.median() - t_mat.SPEED.median() p_mat.SPEED = p_mat.SPEED - off_speed #get dynamic time warping value print('WARPING TIME') print(par) #dist, cost, path = mlpy.dtw_std(t_mat[par[0]].ffill().bfill().values,p_mat[par[0]].ffill().bfill().values,dist_only=False) #changed to dtwp that allows penalty for compression (i.e. prevent long stretches of the same value 2018/04/12 J. Prchlik #penalty = np.abs(p_mat[par[0]].median()-t_mat[par[0]].median()) if 'SPEED' in par: penalty = 15.0 elif any('B' in s for s in par): penalty = .2 print('Penalty = {0:4.3f}'.format(penalty)) path = dtw.warping_path(t_mat[par[0]].ffill().bfill().values, p_mat[par[0]].ffill().bfill().values, penalty=penalty) #put in previous path path = np.array(path).T print('STOP WARPING TIME') #get full offsets for dynamic time warping off_sol = (p_mat.iloc[path[1], :].index - t_mat.iloc[path[0], :].index) print('REINDEXED') #get a region around one of the best fit times b_mat = p_mat.copy() #update the time index of the match array for comparision with training spacecraft (i=training spacecraft time) b_mat = b_mat.reindex(b_mat.iloc[path[1], :].index).interpolate('time') b_mat.index = b_mat.index - off_sol
#dynamic time warping import stampProcessor import numpy as np mode = 'propagationDelayCorrection' coarseTau = 10000 shift = -51318536.0 # We define two sequences x, y as numpy array # where y is actually a sub-sequence from x timeStampAlice = np.load('../data/aliceBobtimeStampAlice.npy') timeStampBob = np.load('../data/aliceBobtimeStampBob.npy') coarseTimebinAlice = stampProcessor.timebin(coarseTau, timeStampAlice) coarseTimebinBob = stampProcessor.timebin(coarseTau, timeStampBob) s1 = coarseTimebinAlice[500000:505000] s2 = coarseTimebinAlice[500000:505000] print('len(x)', len(s1)) print('len(y)', len(s2)) from dtaidistance import dtw from dtaidistance import dtw_visualisation as dtwvis path = dtw.warping_path(s1, s2) dtwvis.plot_warping(s1, s2, path, filename="warp.png") d, paths = dtw.warping_paths(s1, s2, window=25, psi=2) best_path = dtw.best_path(paths) dtwvis.plot_warpingpaths(s1, s2, paths, best_path, filename="path.png")