def dtw_path_averaging(sequence_a, sequence_b, weight_a=1, weight_b=1, path=None, shrink=True, dtw_function=dtw_std): """ Averages the path computed between the two DTW sequences. Computes the DTW distance in order to do so. :param sequence_a: first sequence to be averaged :param sequence_b: second sequence to be averaged :param weight_a: weight of first sequence :param weight_b: weight of the second sequence :param path: computed mapped path between the sequences. Will be computed using `dtw_function` if not provided :param shrink: if set to treu the data will be shrinked to length of maximum sequence :param dtw_function: function that computes DTW path between two sequences, e.g. see `parametrised_dtw_wrapper` :return: """ sequence_a = np.asarray(sequence_a, dtype=float) sequence_b = np.asarray(sequence_b, dtype=float) if path is None: distance, cost, path = dtw_function(sequence_a, sequence_b, dist_only=False) path_base, path_other = path averaged_path = np.array([(sequence_a[i] * weight_a + sequence_b[j] * weight_b) / (weight_a + weight_b) for i, j in zip(path_base, path_other)]) if shrink: averaged_path = uniform_shrinking_to_length(averaged_path, max(len(_strip_nans(sequence_a)), len(_strip_nans(sequence_b)))) return averaged_path
def sdtw_averaging(sequence_a, sequence_b, weight_a, weight_b, path=None, shrink=True, dtw_function=dtw_std): """ Implements Scaled Dynamic Time Warping Path Averaging as described in [#Niennattrakul:2009ep] .. [#Niennattrakul:2009ep] Vit Niennattrakul and Chotirat Ann Ratanamahatana "Shape averaging under Time Warping", 2009 6th International Conference on Electrical Engineering/Electronics, Computer, Telecommunications and Information Technology (ECTI-CON) :param sequence_a: sequence A :param sequence_b: sequence B :param weight_a: weight of sequence A :param weight_b: weight of sequence B :param path: computed mapped path between the sequences. Will be computed using `dtw_function` if not provided :param shrink: if set to true the data will be shrinked to the length of maximum seq :return: """ sequence_a = np.asarray(sequence_a, dtype=float) sequence_b = np.asarray(sequence_b, dtype=float) if path is None: distance, cost, path = dtw_function(sequence_a, sequence_b, dist_only=False) path = izip(path[0], path[1]) # Rezip this for easier traversal averaged_path = [] prev = None diagonal_coefficient = int((weight_a + weight_b) / 2.0) # The paper does not explicitly say how to round this for a,b in path: item = (weight_a * sequence_a[a] + weight_b * sequence_b[b]) / (weight_a + weight_b) if prev is None: extension_coefficient = diagonal_coefficient else: if prev[0] == a: # The path moved from (i,j-1) to (i,j) # assert(prev[1] + 1 == b) extension_coefficient = weight_a elif prev[1] == b: # The path moved from (i-1,j) to (i,j) # assert(prev[0] + 1 == a) extension_coefficient = weight_b else: # Path moved diagonally from (i-1,j-1) to (i,j) # assert(prev[0] + 1 == a) # assert(prev[1] + 1 == b) extension_coefficient = diagonal_coefficient new_items = [item] * extension_coefficient averaged_path.extend(new_items) prev = (a, b) averaged_path = np.asarray(averaged_path, dtype=float) if shrink: averaged_path = uniform_shrinking_to_length(averaged_path, max(len(_strip_nans(sequence_a)), len(_strip_nans(sequence_b)))) return averaged_path
def uniform_scaling_to_length(sequence, desired_length, output_scaling_path=False): """ Uniform scaling procedure, similar to the one provided in [#yankov2007] .. [#yankov2007] D Yankov, E Keogh, J Medina, and B Chiu, "Detecting time series motifs under uniform scaling", 2007 :param sequence: :param desired_length: :return: """ sequence = _strip_nans(sequence) current_len = len(sequence) if current_len == 0: raise ValueError('Empty sequence cannot be extended') elif desired_length == current_len: if output_scaling_path: return sequence, range(desired_length) else: return sequence elif desired_length < current_len: raise ValueError('Desired length is smaller than current length: {0} < {1}'.format(desired_length, current_len)) scaling_factor = float(current_len) / desired_length rescaled_sequence = [sequence[int(floor(i*scaling_factor))] for i in range(desired_length)] if output_scaling_path: return rescaled_sequence, np.asarray([int(floor(i * scaling_factor)) for i in range(desired_length)]) else: return rescaled_sequence
def test_multi_dimension_with_strip(self): x = np.array([[1, 2, 3, np.nan, np.nan, np.nan], [7, 8, 9, np.nan, np.nan, np.nan], [13, 14, 15, np.nan, np.nan, np.nan]], dtype=float).T correct = np.array([[1, 2, 3], [7, 8, 9], [13, 14, 15]], dtype=float).T assert_array_equal(correct, _strip_nans(x))
def uniform_scaling_to_length(sequence, desired_length, output_scaling_path=False): """ Uniform scaling procedure, similar to the one provided in [#yankov2007] .. [#yankov2007] D Yankov, E Keogh, J Medina, and B Chiu, "Detecting time series motifs under uniform scaling", 2007 :param sequence: :param desired_length: :return: """ sequence = _strip_nans(sequence) current_len = len(sequence) if current_len == 0: raise ValueError('Empty sequence cannot be extended') elif desired_length == current_len: if output_scaling_path: return sequence, range(desired_length) else: return sequence elif desired_length < current_len: raise ValueError( 'Desired length is smaller than current length: {0} < {1}'.format( desired_length, current_len)) scaling_factor = float(current_len) / desired_length rescaled_sequence = [ sequence[int(floor(i * scaling_factor))] for i in range(desired_length) ] if output_scaling_path: return rescaled_sequence, np.asarray( [int(floor(i * scaling_factor)) for i in range(desired_length)]) else: return rescaled_sequence
def dtw_path_averaging(sequence_a, sequence_b, weight_a=1, weight_b=1, path=None, shrink=True, dtw_function=dtw_std): """ Averages the path computed between the two DTW sequences. Computes the DTW distance in order to do so. :param sequence_a: first sequence to be averaged :param sequence_b: second sequence to be averaged :param weight_a: weight of first sequence :param weight_b: weight of the second sequence :param path: computed mapped path between the sequences. Will be computed using `dtw_function` if not provided :param shrink: if set to treu the data will be shrinked to length of maximum sequence :param dtw_function: function that computes DTW path between two sequences, e.g. see `parametrised_dtw_wrapper` :return: """ sequence_a = np.asarray(sequence_a, dtype=float) sequence_b = np.asarray(sequence_b, dtype=float) if path is None: distance, cost, path = dtw_function(sequence_a, sequence_b, dist_only=False) path_base, path_other = path averaged_path = np.array([ (sequence_a[i] * weight_a + sequence_b[j] * weight_b) / (weight_a + weight_b) for i, j in zip(path_base, path_other) ]) if shrink: averaged_path = uniform_shrinking_to_length( averaged_path, max(len(_strip_nans(sequence_a)), len(_strip_nans(sequence_b)))) return averaged_path
def uniform_shrinking_to_length(sequence, desired_length): EPSILON = 1e-6 sequence = np.asarray(sequence, dtype=float) sequence = _strip_nans(sequence) current_length = len(sequence) if current_length == 0: raise ValueError('Cannot shrink sequence of length 0') elif current_length < desired_length: raise ValueError('Desired length greater than current length: {0} > {1}'.format(desired_length, current_length)) elif current_length == desired_length: return sequence if desired_length <= 0: raise ValueError('Invalid length desired: {0}'.format(desired_length)) # This is essentially how many points in the current sequence will be mapped to a single point in the newone shrink_factor = float(current_length) / desired_length try: ndim = sequence.shape[1] except IndexError: ndim = 0 if ndim == 0: new_sequence = np.empty(desired_length) else: new_sequence = np.empty((desired_length, ndim)) for i in range(desired_length): start = i * shrink_factor end = (i + 1) * shrink_factor s = 0 d = 0 left_bound = int(floor(start)) if fabs(start - left_bound) <= EPSILON: left_bound_input = 1 else: left_bound_input = ceil(start) - start if left_bound_input >= EPSILON: s += sequence[left_bound] * left_bound_input d += left_bound_input right_bound = int(floor(end)) right_bound_input = end - floor(end) if right_bound_input >= EPSILON: # Epsilon to prevent rounding errors interfering s += sequence[right_bound] * right_bound_input d += right_bound_input for j in xrange(left_bound + 1, right_bound): s += sequence[j] d += 1.0 assert(abs(d - shrink_factor) < 0.000001) new_sequence[i] = s / d return new_sequence
def test_single_dimension_with_strip(self): x = np.array([1, 2, 3, 4, 5, np.nan, np.nan], dtype=float) assert_array_equal(np.array([1, 2, 3, 4, 5], dtype=float), _strip_nans(x))
def test_more_dims_no_strip_needed(self): x = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12], [13, 14, 15, 16, 17, 18]], dtype=float).T assert_array_equal(x, _strip_nans(x))
def uniform_shrinking_to_length(sequence, desired_length): EPSILON = 1e-6 sequence = np.asarray(sequence, dtype=float) sequence = _strip_nans(sequence) current_length = len(sequence) if current_length == 0: raise ValueError('Cannot shrink sequence of length 0') elif current_length < desired_length: raise ValueError( 'Desired length greater than current length: {0} > {1}'.format( desired_length, current_length)) elif current_length == desired_length: return sequence if desired_length <= 0: raise ValueError('Invalid length desired: {0}'.format(desired_length)) # This is essentially how many points in the current sequence will be mapped to a single point in the newone shrink_factor = float(current_length) / desired_length try: ndim = sequence.shape[1] except IndexError: ndim = 0 if ndim == 0: new_sequence = np.empty(desired_length) else: new_sequence = np.empty((desired_length, ndim)) for i in range(desired_length): start = i * shrink_factor end = (i + 1) * shrink_factor s = 0 d = 0 left_bound = int(floor(start)) if fabs(start - left_bound) <= EPSILON: left_bound_input = 1 else: left_bound_input = ceil(start) - start if left_bound_input >= EPSILON: s += sequence[left_bound] * left_bound_input d += left_bound_input right_bound = int(floor(end)) right_bound_input = end - floor(end) if right_bound_input >= EPSILON: # Epsilon to prevent rounding errors interfering s += sequence[right_bound] * right_bound_input d += right_bound_input for j in xrange(left_bound + 1, right_bound): s += sequence[j] d += 1.0 assert (abs(d - shrink_factor) < 0.000001) new_sequence[i] = s / d return new_sequence
def test_multi_dimension_with_strip(self): x = np.array([[1,2,3,np.nan, np.nan, np.nan], [7,8,9,np.nan, np.nan, np.nan], [13,14,15,np.nan,np.nan,np.nan]], dtype=float).T correct = np.array([[1,2,3], [7,8,9], [13, 14, 15]], dtype=float).T assert_array_equal(correct, _strip_nans(x))
def test_single_dimension_with_strip(self): x = np.array([1,2,3,4,5,np.nan, np.nan], dtype=float) assert_array_equal(np.array([1,2,3,4,5], dtype=float), _strip_nans(x))
def test_more_dims_no_strip_needed(self): x = np.array([[1,2,3,4,5,6], [7,8,9,10,11,12], [13,14,15,16,17,18]], dtype=float).T assert_array_equal(x, _strip_nans(x))
def test_single_dimension_no_strip_needed(self): x = np.array([1,2,3,4,5,6], dtype=float) assert_array_equal(x, _strip_nans(x))
def sdtw_averaging(sequence_a, sequence_b, weight_a, weight_b, path=None, shrink=True, dtw_function=dtw_std): """ Implements Scaled Dynamic Time Warping Path Averaging as described in [#Niennattrakul:2009ep] .. [#Niennattrakul:2009ep] Vit Niennattrakul and Chotirat Ann Ratanamahatana "Shape averaging under Time Warping", 2009 6th International Conference on Electrical Engineering/Electronics, Computer, Telecommunications and Information Technology (ECTI-CON) :param sequence_a: sequence A :param sequence_b: sequence B :param weight_a: weight of sequence A :param weight_b: weight of sequence B :param path: computed mapped path between the sequences. Will be computed using `dtw_function` if not provided :param shrink: if set to true the data will be shrinked to the length of maximum seq :return: """ sequence_a = np.asarray(sequence_a, dtype=float) sequence_b = np.asarray(sequence_b, dtype=float) if path is None: distance, cost, path = dtw_function(sequence_a, sequence_b, dist_only=False) path = izip(path[0], path[1]) # Rezip this for easier traversal averaged_path = [] prev = None diagonal_coefficient = int( (weight_a + weight_b) / 2.0) # The paper does not explicitly say how to round this for a, b in path: item = (weight_a * sequence_a[a] + weight_b * sequence_b[b]) / (weight_a + weight_b) if prev is None: extension_coefficient = diagonal_coefficient else: if prev[0] == a: # The path moved from (i,j-1) to (i,j) # assert(prev[1] + 1 == b) extension_coefficient = weight_a elif prev[1] == b: # The path moved from (i-1,j) to (i,j) # assert(prev[0] + 1 == a) extension_coefficient = weight_b else: # Path moved diagonally from (i-1,j-1) to (i,j) # assert(prev[0] + 1 == a) # assert(prev[1] + 1 == b) extension_coefficient = diagonal_coefficient new_items = [item] * extension_coefficient averaged_path.extend(new_items) prev = (a, b) averaged_path = np.asarray(averaged_path, dtype=float) if shrink: averaged_path = uniform_shrinking_to_length( averaged_path, max(len(_strip_nans(sequence_a)), len(_strip_nans(sequence_b)))) return averaged_path
def test_single_dimension_no_strip_needed(self): x = np.array([1, 2, 3, 4, 5, 6], dtype=float) assert_array_equal(x, _strip_nans(x))
def dtw_std(x, y, metric='sqeuclidean', dist_only=True, constraint=None, k=None, try_reverse=True, normalise=False, scale_first=False, *args, **kwargs): """ Wrapper arround MLPY's dtw_std that supports cleaning up of NaNs, and reversing of strings. :param x: :param y: :param metric: dtw metric to use `sqeuclidean`, `euclidean` or `cosine` :param dist_only: return distance only :param constraint: constraint of dtw (try `None` or `'slanted_band'` :param k: parameter k needed for slanted band constraint :param try_reverse: Will try reversing one sequence as to get a better distance :param normalise: If set to true, distance will be divided from the length of the longer sequence :param scale_first: If set to true, the shorte sequence will be scaled to the length of the longer sequence before DTW :param kwargs: :return: """ def _normalise(ans, max_len): if normalise: return ans / max_len else: return ans def _scaled_path(path, scaling_path, flip_paths): path_x = np.asarray([scaling_path[i] for i in path[0]]) path_y = path[1] if flip_paths: path = (path_y, path_x) else: path = (path_x, path_y) return path def _reverse_path(path): n = path.max() path = n - path return path x = np.asarray(x, dtype=np.float) y = np.asarray(y, dtype=np.float) x = _strip_nans(x) y = _strip_nans(y) max_len = max(len(x), len(y)) if scale_first: if len(x) >= len(y): x, y = y, x flip_paths = True else: flip_paths = False x, scaling_path = uniform_scaling_to_length(x, len(y), output_scaling_path=True) regular_ans = mlpy_dtw_std(x, y, metric=metric, dist_only=dist_only, constraint=constraint, k=k, *args, **kwargs) if not try_reverse: if dist_only: return _normalise(regular_ans, max_len) else: dist, cost, path = regular_ans dist = _normalise(dist, max_len) if scale_first: path = _scaled_path(path, scaling_path, flip_paths) return dist, cost, path else: reverse_ans = mlpy_dtw_std(reverse_sequence(x), y, metric=metric, dist_only=dist_only, constraint=constraint, k=k, *args, **kwargs) if dist_only: return _normalise(min(regular_ans, reverse_ans), max_len) elif reverse_ans[0] >= regular_ans[0]: dist, cost, path = regular_ans if scale_first: path = _scaled_path(path, scaling_path, flip_paths) return _normalise(dist, max_len), cost, path else: # dist_only = False and reverse_ans is smaller dist, cost, path = reverse_ans path_rev = (_reverse_path(path[0]), path[1]) if scale_first: path_rev = _scaled_path(path_rev, scaling_path, flip_paths) cost = np.fliplr(cost) return _normalise(dist, max_len), cost, path_rev