Exemplo n.º 1
0
def get_dtw_wrapping_path(data, col1, col2):
    '''
    input: 
        data: dataframe 源数据
        col1: 列名
        col2:列名
    output: 输出图像,上方是col1,下方是col2 
    '''
    indicators = [i for i in data.columns if i not in 'date']
    array_subset = data[indicators].values
    array_subset_zscore = stats.zscore(array_subset)
    array_subset_zscore_T = array_subset_zscore.T
    x_idx = indicators.index(col1)
    y_idx = indicators.index(col2)
    #     x = array_for_dtw_zscore_T[col1,:]
    #     y = array_for_dtw_zscore_T[col2,:]
    x = array_subset_zscore_T[x_idx, :]
    y = array_subset_zscore_T[y_idx, :]
    path = dtw.warping_path(x, y)
    outname = col1 + 'vs' + col2
    ds_xy = dtw.distance(x, y)
    dtwvis.plot_warping(
        x,
        y,
        path,
        filename=
        "D:/Pythoncode/JD_mart/operation_flow_distribution/DTW_for_business/results/%s.png"
        % outname)
    print("%s 和 %s 的DTW距离: %2.4f" % (col1, col2, ds_xy))
Exemplo n.º 2
0
def test_psi_dtw_1a():
    with util_numpy.test_uses_numpy() as np:
        x = np.arange(0, 20, .5)
        s1 = np.sin(x)
        s2 = np.sin(x - 1)
        # Add noise
        # random.seed(1)
        # for idx in range(len(s2)):
        #     if random.random() < 0.05:
        #         s2[idx] += (random.random() - 0.5) / 2
        d, paths = dtw.warping_paths(s1, s2, psi=2, window=25)
        path = dtw.warping_path(s1, s2, psi=2)
        if not dtwvis.test_without_visualization():
            if directory:
                dtwvis.plot_warpingpaths(s1,
                                         s2,
                                         paths,
                                         path,
                                         filename=str(directory /
                                                      "test_psi_dtw_1a.png"))
            # print(paths[:,:])
            # dtwvis.plot_warping(s1, s2, path, filename=os.path.expanduser("~/Desktop/test_psi_dtw_1_1.png"))
            # path = dtw.best_path(paths)
            # dtwvis.plot_warpingpaths(s1, s2, paths, path, filename=os.path.expanduser("~/Desktop/test_psi_dtw_1_2.png"))
        np.testing.assert_equal(d, 0.0)
Exemplo n.º 3
0
def _calculate_dtw_over_time_window(window, minimum_close, maximum_close,
                                    pattern, start_window, end_window,
                                    threshold):
    calculations = []

    minimum_value_pattern, maximum_value_pattern = pattern.get_min_max()

    normalized_data = [
        normalize_function_paper(close_price, minimum_close, maximum_close,
                                 minimum_value_pattern, maximum_value_pattern)
        for close_price in window
    ]
    # distance, path = fastdtw(normalized_data, pattern.get_pattern(), dist=euclidean)
    # alignment = dtw(normalized_data, pattern.get_pattern(), keep_internals=True)

    distance = dtw.distance(normalized_data, pattern.get_pattern())
    path = dtw.warping_path(normalized_data, pattern.get_pattern())

    if distance < threshold:

        calculations.append({
            "pattern_name": pattern.__class__.__name__,
            "start_window": start_window,
            "start_date": list_of_dates[start_window],
            "end_window": end_window,
            "end_date": list_of_dates[end_window],
            "distance": distance,
            "path": path,
            "normalized_data": normalized_data,
            "pattern": pattern.get_pattern()
        })

    return calculations
Exemplo n.º 4
0
def filter_cuts(shifted_observed_runs: List[int],
                expected_runs: List[int]) -> Tuple[List[int], List[int], int]:
    """
    Applies dynamic time warping to select cuts in the observed pixel sequence of whitespace runs according to the expected runs
    :param shifted_observed_runs: histogram of whitespace runs encoded as follows: position = start of run, value = length of run
    :param expected_runs: histogram of expected word cuts (see: `expected_runs_for_line`)
    :return: A triplet `(cuts, cuts_indices, distance)` where:
    * cuts: list of x-coordinates of word cuts
    * cuts_indices: indices of selected cuts
    * distance: DTW distance between the two input sequences
    """
    path = dtw.warping_path(expected_runs, shifted_observed_runs)
    distance = dtw.distance(expected_runs, shifted_observed_runs)

    runs_indices = [i for i, x in enumerate(shifted_observed_runs) if x > 0]
    runs_indices.insert(0, 0)

    cuts = []
    cuts_indices = []
    for i, j in path:
        if expected_runs[i] > 0:

            cuts.append(j)
            index_found = j in runs_indices
            if not index_found:
                print(
                    f"DTW associated expected peak in {i} to zero value in {j}",
                    file=sys.stderr)
            else:
                cuts_indices.append(runs_indices.index(j))

    return cuts, cuts_indices, distance
Exemplo n.º 5
0
def print_dtw(series_1, series_2, output_path):
    """
    Function to draw the DTW for two different time series

    :param series_1: First time serie to compare
    :param series_2: Second time serie to compare
    :param output_path: Path where the pictures will be stored
    """

    len1 = roundup(series_1.__len__())
    len2 = roundup(series_2.__len__())

    contador = 0
    series_1 = series_1
    series_2 = series_2
    series_1 = series_1[:len1]
    series_1 = np.split(series_1, int(len1 / 100))
    series_2 = series_2[:len2]
    series_2 = np.split(series_2, int(len2 / 100))

    for i in range(series_1.__len__()):
        path = dtw.warping_path(series_1[i], series_2[i])
        print(path)
        dtwvis.plot_warping(series_1[i],
                            series_2[i],
                            path,
                            filename=output_path % contador)
        contador += 1
Exemplo n.º 6
0
def test_twoleadecg_1(directory=None):
    with util_numpy.test_uses_numpy() as np:
        s1 = np.array([1.8896,-0.23712,-0.23712,-0.20134,-0.16556,-0.20134,-0.16556,-0.12978,-0.058224,0.013335,0.031225,0.10278,0.013335,-0.094004,-0.058224,-0.11189,-0.14767,-0.16556,-0.14767,-0.094004,-0.14767,-0.16556,-0.16556,-0.21923,-0.21923,-0.25501,-0.20134,-0.20134,-0.18345,-0.23712,-0.20134,-0.23712,-0.12978,-0.11189,-0.46969,-1.2747,-2.3481,-2.8133,-2.7775,-2.5986,-2.3839,-2.0082,-1.8651,-1.6146,-1.3463,-1.1495,-0.88115,-0.55914,-0.34446,-0.16556,-0.0045548,0.2459,0.53214,0.65737,0.71104,0.74682,0.76471,0.76471,0.80049,0.81838,0.87204,0.88993,0.97938,0.97938,1.0152,1.0867,1.1583,1.1762,1.212,1.2656,1.2656,1.2477,1.2656,1.1762,1.0867,0.99727,0.88993,0.74682,0.63948,0.58581,0.47847,0.38902])
        s2 = np.array([1,0.93163,0.094486,0.094486,0.038006,0.080366,0.080366,0.052126,0.080366,0.12273,0.22157,0.29217,0.41925,0.48985,0.39101,0.39101,0.30629,0.24981,0.19333,0.080366,-0.0043544,-0.018474,-0.089075,-0.11731,-0.14555,-0.17379,-0.21615,-0.27263,-0.20203,-0.315,-0.25851,-0.17379,-0.28675,-0.24439,0.16509,-0.11731,-1.0069,-1.9812,-2.4895,-2.786,-2.9272,-2.4612,-2.0518,-1.8964,-1.8258,-1.7411,-1.6705,-1.2893,-0.99276,-0.65388,-0.37148,-0.30087,-0.046714,0.30629,0.53221,0.65929,0.65929,0.72989,0.74401,0.87109,0.89933,0.95581,0.96993,1.0546,1.1394,1.2523,1.2523,1.2947,1.3088,1.3512,1.2806,1.2806,1.1394,1.097,0.89933,0.72989,0.67341,0.54633,0.37689,0.23569,0.10861,0.080366,-0.074955])
        d, paths = dtw.warping_paths(s1, s2, psi=2, window=5)
        path = dtw.warping_path(s1, s2, psi=2)
        if directory:
            dtwvis.plot_warping(s1, s2, path, filename=str(directory / "warping.png"))
            path = dtw.best_path(paths)
            dtwvis.plot_warpingpaths(s1, s2, paths, path, filename=str(directory / "warpingpaths.png"))
Exemplo n.º 7
0
def dtw_visual(x, y):  # shape X, Y np.array([0., 0, 1, 2, 1, 0, 2, 1, 0, 0])
    """
    Plot to show how dtw works
    :param x: time series
    :param y: time series
    :return:
    """
    from dtaidistance import dtw
    from dtaidistance import dtw_visualisation as dtwvis
    path = dtw.warping_path(x, y)
    dtwvis.plot_warping(x, y, path, filename="tmp.png")
Exemplo n.º 8
0
def test_warping_path1():
    with util_numpy.test_uses_numpy() as np:
        s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 0, 0])
        s2 = np.array([0., 1, 2, 0, 0, 0, 0, 0, 0, 0, 0])
        path1 = dtw.warping_path(s1, s2)
        path2 = dtw.warping_path_fast(s1, s2)
        path3 = [(0, 0), (1, 0), (2, 1), (3, 2), (4, 3), (5, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 10)]
        assert len(path1) == len(path3)
        assert len(path2) == len(path3)
        assert all(ai1 == bi1 and ai2 == bi2 for ((ai1, ai2), (bi1, bi2)) in zip(path1, path3))
        assert all(ai1 == bi1 and ai2 == bi2 for ((ai1, ai2), (bi1, bi2)) in zip(path2, path3))
Exemplo n.º 9
0
def dta_dtw(signalA, signalB, **dtw_kwargs):
    '''
    The function bundles the path and distance of the dtaidistance package.
    This is the underlying process to be applied in the HDTW process.
     
    :param signalA: The first signal to apply DTW on
    :param signalB: The second signal to apply DTW on
    :param **dtw_kwargs: any key-word arguments to be propogated to the functions.
    '''
    return dtw.distance_fast(signalA, signalB,**dtw_kwargs), \
           dtw.warping_path(signalA, signalB, **dtw_kwargs)
Exemplo n.º 10
0
def test_normalize():
    with util_numpy.test_uses_numpy() as np:
        s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, 0])
        s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0])
        r, path = dtw.warp(s1, s2)
        if directory:
            dtwvis.plot_warp(s1, s2, r, path, filename=str(directory / "test_normalize1.png"))
        r_c = np.array([0., 1., 2., 2., 1., 0.5, 0., 0., 2., 1., 0., 0., 0.])
        if directory:
            path = dtw.warping_path(s1, s2, psi=2)
            dtwvis.plot_warping(s1, s2, path, filename=str(directory / "test_normalize2.png"))
        np.testing.assert_almost_equal(r, r_c, decimal=4)
Exemplo n.º 11
0
def test_bug4():
    with util_numpy.test_uses_numpy() as np:
        s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, 0])
        s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0])
        path = dtw.warping_path(s1, s2)

        if directory:
            fn = directory / "bug4.png"
        else:
            file = tempfile.NamedTemporaryFile()
            fn = Path(file.name + "_bug4.png")

        dtwvis.plot_warping(s1, s2, path, filename=str(fn))
Exemplo n.º 12
0
def test_twoleadecg_1():
    """Example from http://www.timeseriesclassification.com/description.php?Dataset=TwoLeadECG"""
    with util_numpy.test_uses_numpy() as np:
        s1 = np.array([1.8896,-0.23712,-0.23712,-0.20134,-0.16556,-0.20134,-0.16556,-0.12978,-0.058224,0.013335,0.031225,0.10278,0.013335,-0.094004,-0.058224,-0.11189,-0.14767,-0.16556,-0.14767,-0.094004,-0.14767,-0.16556,-0.16556,-0.21923,-0.21923,-0.25501,-0.20134,-0.20134,-0.18345,-0.23712,-0.20134,-0.23712,-0.12978,-0.11189,-0.46969,-1.2747,-2.3481,-2.8133,-2.7775,-2.5986,-2.3839,-2.0082,-1.8651,-1.6146,-1.3463,-1.1495,-0.88115,-0.55914,-0.34446,-0.16556,-0.0045548,0.2459,0.53214,0.65737,0.71104,0.74682,0.76471,0.76471,0.80049,0.81838,0.87204,0.88993,0.97938,0.97938,1.0152,1.0867,1.1583,1.1762,1.212,1.2656,1.2656,1.2477,1.2656,1.1762,1.0867,0.99727,0.88993,0.74682,0.63948,0.58581,0.47847,0.38902])
        s2 = np.array([1,0.93163,0.094486,0.094486,0.038006,0.080366,0.080366,0.052126,0.080366,0.12273,0.22157,0.29217,0.41925,0.48985,0.39101,0.39101,0.30629,0.24981,0.19333,0.080366,-0.0043544,-0.018474,-0.089075,-0.11731,-0.14555,-0.17379,-0.21615,-0.27263,-0.20203,-0.315,-0.25851,-0.17379,-0.28675,-0.24439,0.16509,-0.11731,-1.0069,-1.9812,-2.4895,-2.786,-2.9272,-2.4612,-2.0518,-1.8964,-1.8258,-1.7411,-1.6705,-1.2893,-0.99276,-0.65388,-0.37148,-0.30087,-0.046714,0.30629,0.53221,0.65929,0.65929,0.72989,0.74401,0.87109,0.89933,0.95581,0.96993,1.0546,1.1394,1.2523,1.2523,1.2947,1.3088,1.3512,1.2806,1.2806,1.1394,1.097,0.89933,0.72989,0.67341,0.54633,0.37689,0.23569,0.10861,0.080366,-0.074955])
        d, paths = dtw.warping_paths(s1, s2, psi=2, window=5)
        path = dtw.warping_path(s1, s2, psi=2)
        if not dtwvis.test_without_visualization():
            if directory:
                import matplotlib.pyplot as plt
                fig, axs = dtwvis.plot_warping(s1, s2, path)  # type: plt.Figure, plt.axes.Axes
                fig.set_size_inches(12, 10)
                fig.set_dpi(100)
                fig.savefig(str(directory / "warping.png"))
                plt.close(fig)
                path = dtw.best_path(paths)
                dtwvis.plot_warpingpaths(s1, s2, paths, path, filename=str(directory / "warpingpaths.png"))
Exemplo n.º 13
0
def mean_of_cluster(series, Z, parent_idx, dp):
    if parent_idx >= num_series:
        parent_idx -= num_series
    # if this index has already been calculated, return its results
    if parent_idx in dp:
        return  dp[parent_idx][0],  dp[parent_idx][1],  dp[parent_idx][2]
    child1_idx = int(Z[parent_idx][0])
    child2_idx = int(Z[parent_idx][1])
    # If the child_idx is not a reference to a different index, then it is a reference to a series id, so get that series and give it a weight of 1
    if child1_idx < num_series:
        child1 = series[child1_idx]
        child1_label = child1_idx
        child1_weight = 1
    # If the child_idx is a reference to another index, recursively get the mean_series of that index
    else: 
        child1, child1_label, child1_weight = mean_of_cluster(series, Z, child1_idx, dp)
    if child2_idx < num_series:
        child2 = series[child2_idx]
        child2_label = child2_idx
        child2_weight = 1
    else: 
        child2, child2_label, child2_weight = mean_of_cluster(series, Z, child2_idx, dp)
    # Get the warping path from child1's series to child2's series
    path = dtw.warping_path(child1, child2)
    # the initial mean will be longer than the length of either child series, since it will include every path connection
    mean_long = []
    for pair in path:
        child1_val = child1[pair[0]]
        child2_val = child2[pair[1]]
        # Calculate the mean using the children's weight. Each series that the child includes adds another unit of weight to the child's series
        mid = (child1_val * child1_weight + child2_val * child2_weight) \
            / (child1_weight + child2_weight)
        mean_long.append(mid)
    # transform into np.array to allow for interpolation to be applied to it
    mean_long = np.asarray(mean_long, dtype=np.double)
    # Interpolation creates a function that follows the curve defined be mean_long
    mean_interp = interp.interp1d(np.arange(mean_long.size), mean_long)
    # Sample the function with num_pts_per_series samplings to approximate mean_long in with num_pts_per_series points 
    mean_compress = mean_interp(np.linspace(0, mean_long.size-1, num_pts_per_series))
    # input results to dp dictionary for future reference
    dp[parent_idx] = [mean_compress, (child1_label, child2_label), \
        child1_weight + child2_weight]
    return mean_compress, (child1_label, child2_label), child1_weight + child2_weight
Exemplo n.º 14
0
    def dtw_(self, length_min, length_max):
        path = dtw.warping_path(self.new_real_normal[length_min:length_max],
                                self.ncsimul_y_normal[length_min:length_max])
        distance, paths = dtw.warping_paths(
            self.new_real_normal[length_min:length_max],
            self.ncsimul_y_normal[length_min:length_max])

        dtwvis.plot_warping(self.new_real_normal[length_min:length_max],
                            self.ncsimul_y_normal[length_min:length_max],
                            path,
                            filename="warp" + str(self.test) + ".png")

        best_path = dtw.best_path(paths)
        dtwvis.plot_warpingpaths(self.new_real_normal[length_min:length_max],
                                 self.ncsimul_y_normal[length_min:length_max],
                                 paths,
                                 best_path,
                                 filename="best_path" + str(self.test) +
                                 ".png")
Exemplo n.º 15
0
def main():
    s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 0, 2, 1, 0, 0])
    s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0])
    path = dtw.warping_path(s1, s2)
    dtwvis.plot_warping(s1, s2, path)
    
    plt.figure(1)
    plt.subplot(211)
    plt.title('Timeseries: s1 & s2')
    plt.plot(s1)
    plt.subplot(212)
    plt.plot(s2)
    plt.show()
    
    dist = dtw.distance(s1, s2)
    print(dist)
    
    plt.figure(2)
    d, paths = dtw.warping_paths(s1, s2, window=3, psi=2)
    best_path = dtw.best_path(paths)
    dtwvis.plot_warpingpaths(s1, s2, paths, best_path)
Exemplo n.º 16
0
def prep_dtw(y, y_, min, max, file_):
    try:
        len(y) >= max and len(y_) >= max
    except:
        raise NameError('the maximum lengh not respects lenght of inputs')
    else:
        path = dtw.warping_path(y[min:max], y_[min:max])
        distance, paths = dtw.warping_paths(y[min:max], y_[min:max])
        dtwvis.plot_warping(y[min:max],
                            y_[min:max],
                            path,
                            filename=file_ + "warp_results.png")

        best_path = dtw.best_path(paths)
        dtwvis.plot_warpingpaths(y[min:max],
                                 y_[min:max],
                                 paths,
                                 best_path,
                                 filename=file_ + "best_path_results.png")

    return path, distance
from dtaidistance import dtw
from dtaidistance import dtw_visualisation as dtwvis
import pandas as pd

# 计算股票间的对数收益时间序列间的动态时间规整距离

path1,path2 = "000001.XSHE.csv","000063.XSHE.csv",
feature = "rclose"
length = 20
window,psi = 10,5

rc1,rc2 = pd.read_csv(path1)[feature][:length],pd.read_csv(path2)[feature][:length]
dis, paths = dtw.warping_paths(rc1, rc2, window=window, psi=psi)

# 动态时间规整距离
print(dis)

# 绘图(输出形式)
best_path = dtw.best_path(paths)
dtwvis.plot_warpingpaths(rc1, rc2, paths, best_path,shownumbers=True)

# 绘图(保存)
path = dtw.warping_path(rc1, rc2)
dtwvis.plot_warping(rc1, rc2, path, filename="wrapping.png")
Exemplo n.º 18
0
                    x = series[comparison]

                    xallm = []
                    tallm = []


                    for n in nodes:

                        if n < 3687:



                            y  = series[int(n)]

                            map_x, map_y = list(zip(*dtw.warping_path(x, y)))

                            map_x = np.asarray(map_x)
                            map_y = np.asarray(map_y)

                            if len(y):
                                maxl = len(y)




                            xallm += list(y[map_y])
                            tallm += range(len(map_y))

                            if i == 1:
                                maxl  = len(x)
Exemplo n.º 19
0
        except IndexError:
            #get median offset to apply to match spacecraft
            off_speed = p_mat.SPEED.median() - t_mat.SPEED.median()
            p_mat.SPEED = p_mat.SPEED - off_speed

    #get dynamic time warping value
    print('WARPING TIME')
    print(par)
    #dist, cost, path = mlpy.dtw_std(t_mat[par[0]].ffill().bfill().values,p_mat[par[0]].ffill().bfill().values,dist_only=False)
    #changed to dtwp that allows penalty for compression (i.e. prevent long stretches of the same value 2018/04/12 J. Prchlik
    #penalty = np.abs(p_mat[par[0]].median()-t_mat[par[0]].median())
    if 'SPEED' in par: penalty = 15.0
    elif any('B' in s for s in par): penalty = .2
    print('Penalty = {0:4.3f}'.format(penalty))
    path = dtw.warping_path(t_mat[par[0]].ffill().bfill().values,
                            p_mat[par[0]].ffill().bfill().values,
                            penalty=penalty)
    #put in previous path
    path = np.array(path).T
    print('STOP WARPING TIME')

    #get full offsets for dynamic time warping
    off_sol = (p_mat.iloc[path[1], :].index - t_mat.iloc[path[0], :].index)
    print('REINDEXED')

    #get a region around one of the best fit times
    b_mat = p_mat.copy()

    #update the time index of the match array for comparision with training spacecraft (i=training spacecraft time)
    b_mat = b_mat.reindex(b_mat.iloc[path[1], :].index).interpolate('time')
    b_mat.index = b_mat.index - off_sol
Exemplo n.º 20
0
#dynamic time warping
import stampProcessor
import numpy as np

mode = 'propagationDelayCorrection'
coarseTau = 10000
shift = -51318536.0
# We define two sequences x, y as numpy array
# where y is actually a sub-sequence from x
timeStampAlice = np.load('../data/aliceBobtimeStampAlice.npy')
timeStampBob = np.load('../data/aliceBobtimeStampBob.npy')
coarseTimebinAlice = stampProcessor.timebin(coarseTau, timeStampAlice)
coarseTimebinBob = stampProcessor.timebin(coarseTau, timeStampBob)

s1 = coarseTimebinAlice[500000:505000]
s2 = coarseTimebinAlice[500000:505000]
print('len(x)', len(s1))
print('len(y)', len(s2))

from dtaidistance import dtw
from dtaidistance import dtw_visualisation as dtwvis

path = dtw.warping_path(s1, s2)
dtwvis.plot_warping(s1, s2, path, filename="warp.png")

d, paths = dtw.warping_paths(s1, s2, window=25, psi=2)
best_path = dtw.best_path(paths)
dtwvis.plot_warpingpaths(s1, s2, paths, best_path, filename="path.png")