Python _strip_nans示例，dgw.dtw.utilities._strip_nans Python示例

示例#1

0

显示文件

文件： transformations.py 项目： lukauskas/dgw

def dtw_path_averaging(sequence_a, sequence_b, weight_a=1, weight_b=1, path=None, shrink=True, dtw_function=dtw_std):
    """
    Averages the path computed between the two DTW sequences.
    Computes the DTW distance in order to do so.

    :param sequence_a: first sequence to be averaged
    :param sequence_b: second sequence to be averaged
    :param weight_a: weight of first sequence
    :param weight_b: weight of the second sequence
    :param path: computed mapped path between the sequences. Will be computed using `dtw_function` if not provided
    :param shrink: if set to treu the data will be shrinked to length of maximum sequence
    :param dtw_function: function that computes DTW path between two sequences, e.g. see `parametrised_dtw_wrapper`
    :return:
    """
    sequence_a = np.asarray(sequence_a, dtype=float)
    sequence_b = np.asarray(sequence_b, dtype=float)

    if path is None:
        distance, cost, path = dtw_function(sequence_a, sequence_b, dist_only=False)

    path_base, path_other = path

    averaged_path = np.array([(sequence_a[i] * weight_a + sequence_b[j] * weight_b) / (weight_a + weight_b) for i, j in zip(path_base, path_other)])

    if shrink:
        averaged_path = uniform_shrinking_to_length(averaged_path, max(len(_strip_nans(sequence_a)),
                                                                       len(_strip_nans(sequence_b))))

    return averaged_path

示例#2

0

显示文件

文件： transformations.py 项目： lukauskas/dgw

def sdtw_averaging(sequence_a, sequence_b, weight_a, weight_b, path=None, shrink=True, dtw_function=dtw_std):
    """
    Implements Scaled Dynamic Time Warping Path Averaging as described in [#Niennattrakul:2009ep]

    .. [#Niennattrakul:2009ep] Vit Niennattrakul and Chotirat Ann Ratanamahatana "Shape averaging under Time Warping",
       2009 6th International Conference on Electrical Engineering/Electronics, Computer, Telecommunications and Information Technology (ECTI-CON)

    :param sequence_a: sequence A
    :param sequence_b: sequence B
    :param weight_a: weight of sequence A
    :param weight_b: weight of sequence B
    :param path: computed mapped path between the sequences. Will be computed using `dtw_function` if not provided
    :param shrink: if set to true the data will be shrinked to the length of maximum seq
    :return:
    """
    sequence_a = np.asarray(sequence_a, dtype=float)
    sequence_b = np.asarray(sequence_b, dtype=float)

    if path is None:
        distance, cost, path = dtw_function(sequence_a, sequence_b, dist_only=False)

    path = izip(path[0], path[1])  # Rezip this for easier traversal

    averaged_path = []

    prev = None

    diagonal_coefficient = int((weight_a + weight_b) / 2.0)  # The paper does not explicitly say how to round this
    for a,b in path:

        item = (weight_a * sequence_a[a] + weight_b * sequence_b[b]) / (weight_a + weight_b)
        if prev is None:
            extension_coefficient = diagonal_coefficient
        else:
            if prev[0] == a:  # The path moved from (i,j-1) to (i,j)
                # assert(prev[1] + 1 == b)
                extension_coefficient = weight_a
            elif prev[1] == b:  # The path moved from (i-1,j) to (i,j)
                # assert(prev[0] + 1 == a)
                extension_coefficient = weight_b
            else:  # Path moved diagonally from (i-1,j-1) to (i,j)
                # assert(prev[0] + 1 == a)
                # assert(prev[1] + 1 == b)
                extension_coefficient = diagonal_coefficient

        new_items = [item] * extension_coefficient
        averaged_path.extend(new_items)
        prev = (a, b)

    averaged_path = np.asarray(averaged_path, dtype=float)
    if shrink:
        averaged_path = uniform_shrinking_to_length(averaged_path, max(len(_strip_nans(sequence_a)),
                                                                       len(_strip_nans(sequence_b))))
    return averaged_path

示例#3

0

显示文件

文件： scaling.py 项目： lukauskas/dgw

def uniform_scaling_to_length(sequence, desired_length, output_scaling_path=False):
    """
    Uniform scaling procedure, similar to the one provided in [#yankov2007]
    .. [#yankov2007] D Yankov, E Keogh, J Medina, and B Chiu, "Detecting time series motifs under uniform scaling", 2007
    :param sequence:
    :param desired_length:
    :return:
    """
    sequence = _strip_nans(sequence)
    current_len = len(sequence)
    if current_len == 0:
        raise ValueError('Empty sequence cannot be extended')
    elif desired_length == current_len:
        if output_scaling_path:
            return sequence, range(desired_length)
        else:
            return sequence
    elif desired_length < current_len:
        raise ValueError('Desired length is smaller than current length: {0} < {1}'.format(desired_length, current_len))

    scaling_factor = float(current_len) / desired_length

    rescaled_sequence = [sequence[int(floor(i*scaling_factor))] for i in range(desired_length)]

    if output_scaling_path:
        return rescaled_sequence, np.asarray([int(floor(i * scaling_factor)) for i in range(desired_length)])
    else:
        return rescaled_sequence

示例#4

0

显示文件

文件： test_distance.py 项目： lukauskas/dgw

 def test_multi_dimension_with_strip(self):
     x = np.array([[1, 2, 3, np.nan, np.nan, np.nan],
                   [7, 8, 9, np.nan, np.nan, np.nan],
                   [13, 14, 15, np.nan, np.nan, np.nan]],
                  dtype=float).T
     correct = np.array([[1, 2, 3], [7, 8, 9], [13, 14, 15]], dtype=float).T
     assert_array_equal(correct, _strip_nans(x))

示例#5

0

显示文件

def uniform_scaling_to_length(sequence,
                              desired_length,
                              output_scaling_path=False):
    """
    Uniform scaling procedure, similar to the one provided in [#yankov2007]
    .. [#yankov2007] D Yankov, E Keogh, J Medina, and B Chiu, "Detecting time series motifs under uniform scaling", 2007
    :param sequence:
    :param desired_length:
    :return:
    """
    sequence = _strip_nans(sequence)
    current_len = len(sequence)
    if current_len == 0:
        raise ValueError('Empty sequence cannot be extended')
    elif desired_length == current_len:
        if output_scaling_path:
            return sequence, range(desired_length)
        else:
            return sequence
    elif desired_length < current_len:
        raise ValueError(
            'Desired length is smaller than current length: {0} < {1}'.format(
                desired_length, current_len))

    scaling_factor = float(current_len) / desired_length

    rescaled_sequence = [
        sequence[int(floor(i * scaling_factor))] for i in range(desired_length)
    ]

    if output_scaling_path:
        return rescaled_sequence, np.asarray(
            [int(floor(i * scaling_factor)) for i in range(desired_length)])
    else:
        return rescaled_sequence

示例#6

0

显示文件

文件： transformations.py 项目： lukauskas/dgw

def dtw_path_averaging(sequence_a,
                       sequence_b,
                       weight_a=1,
                       weight_b=1,
                       path=None,
                       shrink=True,
                       dtw_function=dtw_std):
    """
    Averages the path computed between the two DTW sequences.
    Computes the DTW distance in order to do so.

    :param sequence_a: first sequence to be averaged
    :param sequence_b: second sequence to be averaged
    :param weight_a: weight of first sequence
    :param weight_b: weight of the second sequence
    :param path: computed mapped path between the sequences. Will be computed using `dtw_function` if not provided
    :param shrink: if set to treu the data will be shrinked to length of maximum sequence
    :param dtw_function: function that computes DTW path between two sequences, e.g. see `parametrised_dtw_wrapper`
    :return:
    """
    sequence_a = np.asarray(sequence_a, dtype=float)
    sequence_b = np.asarray(sequence_b, dtype=float)

    if path is None:
        distance, cost, path = dtw_function(sequence_a,
                                            sequence_b,
                                            dist_only=False)

    path_base, path_other = path

    averaged_path = np.array([
        (sequence_a[i] * weight_a + sequence_b[j] * weight_b) /
        (weight_a + weight_b) for i, j in zip(path_base, path_other)
    ])

    if shrink:
        averaged_path = uniform_shrinking_to_length(
            averaged_path,
            max(len(_strip_nans(sequence_a)), len(_strip_nans(sequence_b))))

    return averaged_path

示例#7

0

显示文件

文件： scaling.py 项目： lukauskas/dgw

def uniform_shrinking_to_length(sequence, desired_length):
    EPSILON = 1e-6

    sequence = np.asarray(sequence, dtype=float)
    sequence = _strip_nans(sequence)

    current_length = len(sequence)

    if current_length == 0:
        raise ValueError('Cannot shrink sequence of length 0')
    elif current_length < desired_length:
        raise ValueError('Desired length greater than current length: {0} > {1}'.format(desired_length, current_length))
    elif current_length == desired_length:
        return sequence

    if desired_length <= 0:
        raise ValueError('Invalid length desired: {0}'.format(desired_length))

    # This is essentially how many points in the current sequence will be mapped to a single point in the newone
    shrink_factor = float(current_length) / desired_length

    try:
        ndim = sequence.shape[1]
    except IndexError:
        ndim = 0
    if ndim == 0:
        new_sequence = np.empty(desired_length)
    else:
        new_sequence = np.empty((desired_length, ndim))

    for i in range(desired_length):
        start = i * shrink_factor
        end = (i + 1) * shrink_factor

        s = 0
        d = 0

        left_bound = int(floor(start))

        if fabs(start - left_bound) <= EPSILON:
            left_bound_input = 1
        else:
            left_bound_input = ceil(start) - start

        if left_bound_input >= EPSILON:
            s += sequence[left_bound] * left_bound_input
            d += left_bound_input

        right_bound = int(floor(end))
        right_bound_input = end - floor(end)

        if right_bound_input >= EPSILON:  # Epsilon to prevent rounding errors interfering
            s += sequence[right_bound] * right_bound_input
            d += right_bound_input

        for j in xrange(left_bound + 1, right_bound):
            s += sequence[j]
            d += 1.0

        assert(abs(d - shrink_factor) < 0.000001)

        new_sequence[i] = s / d

    return new_sequence

示例#8

0

显示文件

文件： test_distance.py 项目： lukauskas/dgw

 def test_single_dimension_with_strip(self):
     x = np.array([1, 2, 3, 4, 5, np.nan, np.nan], dtype=float)
     assert_array_equal(np.array([1, 2, 3, 4, 5], dtype=float),
                        _strip_nans(x))

示例#9

0

显示文件

文件： test_distance.py 项目： lukauskas/dgw

 def test_more_dims_no_strip_needed(self):
     x = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12],
                   [13, 14, 15, 16, 17, 18]],
                  dtype=float).T
     assert_array_equal(x, _strip_nans(x))

示例#10

0

显示文件

def uniform_shrinking_to_length(sequence, desired_length):
    EPSILON = 1e-6

    sequence = np.asarray(sequence, dtype=float)
    sequence = _strip_nans(sequence)

    current_length = len(sequence)

    if current_length == 0:
        raise ValueError('Cannot shrink sequence of length 0')
    elif current_length < desired_length:
        raise ValueError(
            'Desired length greater than current length: {0} > {1}'.format(
                desired_length, current_length))
    elif current_length == desired_length:
        return sequence

    if desired_length <= 0:
        raise ValueError('Invalid length desired: {0}'.format(desired_length))

    # This is essentially how many points in the current sequence will be mapped to a single point in the newone
    shrink_factor = float(current_length) / desired_length

    try:
        ndim = sequence.shape[1]
    except IndexError:
        ndim = 0
    if ndim == 0:
        new_sequence = np.empty(desired_length)
    else:
        new_sequence = np.empty((desired_length, ndim))

    for i in range(desired_length):
        start = i * shrink_factor
        end = (i + 1) * shrink_factor

        s = 0
        d = 0

        left_bound = int(floor(start))

        if fabs(start - left_bound) <= EPSILON:
            left_bound_input = 1
        else:
            left_bound_input = ceil(start) - start

        if left_bound_input >= EPSILON:
            s += sequence[left_bound] * left_bound_input
            d += left_bound_input

        right_bound = int(floor(end))
        right_bound_input = end - floor(end)

        if right_bound_input >= EPSILON:  # Epsilon to prevent rounding errors interfering
            s += sequence[right_bound] * right_bound_input
            d += right_bound_input

        for j in xrange(left_bound + 1, right_bound):
            s += sequence[j]
            d += 1.0

        assert (abs(d - shrink_factor) < 0.000001)

        new_sequence[i] = s / d

    return new_sequence

示例#11

0

显示文件

文件： test_distance.py 项目： lukauskas/dgw

 def test_multi_dimension_with_strip(self):
     x = np.array([[1,2,3,np.nan, np.nan, np.nan], [7,8,9,np.nan, np.nan, np.nan], [13,14,15,np.nan,np.nan,np.nan]], dtype=float).T
     correct = np.array([[1,2,3], [7,8,9], [13, 14, 15]], dtype=float).T
     assert_array_equal(correct, _strip_nans(x))

示例#12

0

显示文件

文件： test_distance.py 项目： lukauskas/dgw

 def test_single_dimension_with_strip(self):
     x = np.array([1,2,3,4,5,np.nan, np.nan], dtype=float)
     assert_array_equal(np.array([1,2,3,4,5], dtype=float), _strip_nans(x))

示例#13

0

显示文件

文件： test_distance.py 项目： lukauskas/dgw

 def test_more_dims_no_strip_needed(self):
     x = np.array([[1,2,3,4,5,6], [7,8,9,10,11,12], [13,14,15,16,17,18]], dtype=float).T
     assert_array_equal(x, _strip_nans(x))

示例#14

0

显示文件

文件： test_distance.py 项目： lukauskas/dgw

 def test_single_dimension_no_strip_needed(self):
     x = np.array([1,2,3,4,5,6], dtype=float)
     assert_array_equal(x, _strip_nans(x))

示例#15

0

显示文件

文件： transformations.py 项目： lukauskas/dgw

def sdtw_averaging(sequence_a,
                   sequence_b,
                   weight_a,
                   weight_b,
                   path=None,
                   shrink=True,
                   dtw_function=dtw_std):
    """
    Implements Scaled Dynamic Time Warping Path Averaging as described in [#Niennattrakul:2009ep]

    .. [#Niennattrakul:2009ep] Vit Niennattrakul and Chotirat Ann Ratanamahatana "Shape averaging under Time Warping",
       2009 6th International Conference on Electrical Engineering/Electronics, Computer, Telecommunications and Information Technology (ECTI-CON)

    :param sequence_a: sequence A
    :param sequence_b: sequence B
    :param weight_a: weight of sequence A
    :param weight_b: weight of sequence B
    :param path: computed mapped path between the sequences. Will be computed using `dtw_function` if not provided
    :param shrink: if set to true the data will be shrinked to the length of maximum seq
    :return:
    """
    sequence_a = np.asarray(sequence_a, dtype=float)
    sequence_b = np.asarray(sequence_b, dtype=float)

    if path is None:
        distance, cost, path = dtw_function(sequence_a,
                                            sequence_b,
                                            dist_only=False)

    path = izip(path[0], path[1])  # Rezip this for easier traversal

    averaged_path = []

    prev = None

    diagonal_coefficient = int(
        (weight_a + weight_b) /
        2.0)  # The paper does not explicitly say how to round this
    for a, b in path:

        item = (weight_a * sequence_a[a] +
                weight_b * sequence_b[b]) / (weight_a + weight_b)
        if prev is None:
            extension_coefficient = diagonal_coefficient
        else:
            if prev[0] == a:  # The path moved from (i,j-1) to (i,j)
                # assert(prev[1] + 1 == b)
                extension_coefficient = weight_a
            elif prev[1] == b:  # The path moved from (i-1,j) to (i,j)
                # assert(prev[0] + 1 == a)
                extension_coefficient = weight_b
            else:  # Path moved diagonally from (i-1,j-1) to (i,j)
                # assert(prev[0] + 1 == a)
                # assert(prev[1] + 1 == b)
                extension_coefficient = diagonal_coefficient

        new_items = [item] * extension_coefficient
        averaged_path.extend(new_items)
        prev = (a, b)

    averaged_path = np.asarray(averaged_path, dtype=float)
    if shrink:
        averaged_path = uniform_shrinking_to_length(
            averaged_path,
            max(len(_strip_nans(sequence_a)), len(_strip_nans(sequence_b))))
    return averaged_path

示例#16

0

显示文件

文件： test_distance.py 项目： lukauskas/dgw

 def test_single_dimension_no_strip_needed(self):
     x = np.array([1, 2, 3, 4, 5, 6], dtype=float)
     assert_array_equal(x, _strip_nans(x))

示例#17

0

显示文件

文件： distance.py 项目： lukauskas/dgw

def dtw_std(x,
            y,
            metric='sqeuclidean',
            dist_only=True,
            constraint=None,
            k=None,
            try_reverse=True,
            normalise=False,
            scale_first=False,
            *args,
            **kwargs):
    """
    Wrapper arround MLPY's dtw_std that supports cleaning up of NaNs, and reversing of strings.
    :param x:
    :param y:
    :param metric: dtw metric to use `sqeuclidean`, `euclidean` or `cosine`
    :param dist_only: return distance only
    :param constraint: constraint of dtw (try `None` or `'slanted_band'`
    :param k: parameter k needed for slanted band constraint
    :param try_reverse: Will try reversing one sequence as to get a better distance
    :param normalise: If set to true, distance will be divided from the length of the longer sequence
    :param scale_first: If set to true, the shorte sequence will be scaled to the length of the longer sequence before DTW
    :param kwargs:
    :return:
    """
    def _normalise(ans, max_len):
        if normalise:
            return ans / max_len
        else:
            return ans

    def _scaled_path(path, scaling_path, flip_paths):
        path_x = np.asarray([scaling_path[i] for i in path[0]])
        path_y = path[1]

        if flip_paths:
            path = (path_y, path_x)
        else:
            path = (path_x, path_y)

        return path

    def _reverse_path(path):
        n = path.max()
        path = n - path
        return path

    x = np.asarray(x, dtype=np.float)
    y = np.asarray(y, dtype=np.float)

    x = _strip_nans(x)
    y = _strip_nans(y)

    max_len = max(len(x), len(y))
    if scale_first:
        if len(x) >= len(y):
            x, y = y, x
            flip_paths = True
        else:
            flip_paths = False

        x, scaling_path = uniform_scaling_to_length(x,
                                                    len(y),
                                                    output_scaling_path=True)

    regular_ans = mlpy_dtw_std(x,
                               y,
                               metric=metric,
                               dist_only=dist_only,
                               constraint=constraint,
                               k=k,
                               *args,
                               **kwargs)
    if not try_reverse:
        if dist_only:
            return _normalise(regular_ans, max_len)
        else:
            dist, cost, path = regular_ans
            dist = _normalise(dist, max_len)

            if scale_first:
                path = _scaled_path(path, scaling_path, flip_paths)

            return dist, cost, path
    else:
        reverse_ans = mlpy_dtw_std(reverse_sequence(x),
                                   y,
                                   metric=metric,
                                   dist_only=dist_only,
                                   constraint=constraint,
                                   k=k,
                                   *args,
                                   **kwargs)
        if dist_only:
            return _normalise(min(regular_ans, reverse_ans), max_len)
        elif reverse_ans[0] >= regular_ans[0]:
            dist, cost, path = regular_ans
            if scale_first:
                path = _scaled_path(path, scaling_path, flip_paths)
            return _normalise(dist, max_len), cost, path
        else:  # dist_only = False and reverse_ans is smaller
            dist, cost, path = reverse_ans
            path_rev = (_reverse_path(path[0]), path[1])

            if scale_first:
                path_rev = _scaled_path(path_rev, scaling_path, flip_paths)

            cost = np.fliplr(cost)
            return _normalise(dist, max_len), cost, path_rev

示例#18

0

显示文件

文件： distance.py 项目： lukauskas/dgw

def dtw_std(x, y, metric='sqeuclidean', dist_only=True, constraint=None, k=None, try_reverse=True, normalise=False,
            scale_first=False, *args, **kwargs):
    """
    Wrapper arround MLPY's dtw_std that supports cleaning up of NaNs, and reversing of strings.
    :param x:
    :param y:
    :param metric: dtw metric to use `sqeuclidean`, `euclidean` or `cosine`
    :param dist_only: return distance only
    :param constraint: constraint of dtw (try `None` or `'slanted_band'`
    :param k: parameter k needed for slanted band constraint
    :param try_reverse: Will try reversing one sequence as to get a better distance
    :param normalise: If set to true, distance will be divided from the length of the longer sequence
    :param scale_first: If set to true, the shorte sequence will be scaled to the length of the longer sequence before DTW
    :param kwargs:
    :return:
    """
    def _normalise(ans, max_len):
        if normalise:
            return ans / max_len
        else:
            return ans

    def _scaled_path(path, scaling_path, flip_paths):
        path_x = np.asarray([scaling_path[i] for i in path[0]])
        path_y = path[1]

        if flip_paths:
            path = (path_y, path_x)
        else:
            path = (path_x, path_y)

        return path

    def _reverse_path(path):
        n = path.max()
        path = n - path
        return path


    x = np.asarray(x, dtype=np.float)
    y = np.asarray(y, dtype=np.float)

    x = _strip_nans(x)
    y = _strip_nans(y)

    max_len = max(len(x), len(y))
    if scale_first:
        if len(x) >= len(y):
            x, y = y, x
            flip_paths = True
        else:
            flip_paths = False

        x, scaling_path = uniform_scaling_to_length(x, len(y), output_scaling_path=True)

    regular_ans = mlpy_dtw_std(x, y, metric=metric, dist_only=dist_only, constraint=constraint, k=k, *args, **kwargs)
    if not try_reverse:
        if dist_only:
            return _normalise(regular_ans, max_len)
        else:
            dist, cost, path = regular_ans
            dist = _normalise(dist, max_len)

            if scale_first:
                path = _scaled_path(path, scaling_path, flip_paths)

            return dist, cost, path
    else:
        reverse_ans = mlpy_dtw_std(reverse_sequence(x), y, metric=metric, dist_only=dist_only, constraint=constraint, k=k, *args, **kwargs)
        if dist_only:
            return _normalise(min(regular_ans, reverse_ans), max_len)
        elif reverse_ans[0] >= regular_ans[0]:
            dist, cost, path = regular_ans
            if scale_first:
                path = _scaled_path(path, scaling_path, flip_paths)
            return _normalise(dist, max_len), cost, path
        else:  # dist_only = False and reverse_ans is smaller
            dist, cost, path = reverse_ans
            path_rev = (_reverse_path(path[0]), path[1])

            if scale_first:
                path_rev = _scaled_path(path_rev, scaling_path, flip_paths)

            cost = np.fliplr(cost)
            return _normalise(dist, max_len), cost, path_rev