示例#1
0
def test_aampi_profile_index_match():
    T_full = np.random.rand(64)
    m = 3
    T_full_subseq = core.rolling_window(T_full, m)
    warm_start = 8

    T_stream = T_full[:warm_start].copy()
    stream = aampi(T_stream, m, egress=True)
    P = np.full(stream.P_.shape, np.inf)
    left_P = np.full(stream.left_P_.shape, np.inf)

    n = 0
    for i in range(len(T_stream), len(T_full)):
        t = T_full[i]
        stream.update(t)

        P[:] = np.inf
        idx = np.argwhere(stream.I_ >= 0).flatten()
        P[idx] = naive.distance(T_full_subseq[idx + n + 1],
                                T_full_subseq[stream.I_[idx]],
                                axis=1)

        left_P[:] = np.inf
        idx = np.argwhere(stream.left_I_ >= 0).flatten()
        left_P[idx] = naive.distance(T_full_subseq[idx + n + 1],
                                     T_full_subseq[stream.left_I_[idx]],
                                     axis=1)

        npt.assert_almost_equal(stream.P_, P)
        npt.assert_almost_equal(stream.left_P_, left_P)

        n += 1
示例#2
0
def test_motifs_two_motifs():
    # Fix seed, because in some case motifs can be off by an index resulting in test
    # fails, which is caused since one of the motifs is not repeated perfectly in T.
    np.random.seed(1234)

    # The time series is random noise with two motifs for m=10:
    # * (almost) identical step functions at indices 10, 110 and 210
    # * identical linear slopes at indices 70 and 170
    T = np.random.normal(size=300)
    m = 20

    T[10:30] = 1
    T[12:28] = 2

    T[110:130] = 3
    T[112:128] = 6
    T[120] = 6.6

    T[210:230] = 1
    T[212:228] = 2
    T[220] = 1.9
    # naive.distance(naive.z_norm(T[10:30]), naive.z_norm(T[110:130])) = 0.47
    # naive.distance(naive.z_norm(T[10:30]), naive.z_norm(T[210:230])) = 0.24
    # naive.distance(naive.z_norm(T[110:130]), naive.z_norm(T[210:230])) = 0.72
    # Hence T[10:30] is the motif representative for this motif

    T[70:90] = np.arange(m) * 0.1
    T[170:190] = np.arange(m) * 0.1
    # naive.distance(naive.z_norm(T[70:90]), naive.z_norm(T[170:190])) = 0.0

    max_motifs = 2

    mp = naive.stump(T, m)

    # left_indices = [[70, 170, -1], [10, 210, 110]]
    left_profile_values = [
        [0.0, 0.0, np.nan],
        [
            0.0,
            naive.distance(core.z_norm(T[10:30]), core.z_norm(T[210:230])),
            naive.distance(core.z_norm(T[10:30]), core.z_norm(T[110:130])),
        ],
    ]

    right_distance_values, right_indices = motifs(
        T,
        mp[:, 0],
        max_motifs=max_motifs,
        max_distance=0.5,
        cutoff=np.inf,
    )

    # We ignore indices because of sorting ambiguities for equal distances.
    # As long as the distances are correct, the indices will be too.
    npt.assert_almost_equal(left_profile_values,
                            right_distance_values,
                            decimal=6)

    # Reset seed
    np.random.seed(None)
示例#3
0
def test_naive_match_exclusion_zone():
    # The query appears as a perfect match at location 1 and as very close matches
    # (z-normalized distance of 0.05) at location 0, 5 and 9.
    # However, since we apply an exclusion zone, the match at index 0 is ignored
    T = np.array(
        [0.1, 1.0, 2.0, 3.0, -1.0, 0.1, 1.0, 2.0, -0.5, 0.2, 2.0, 4.0])
    Q = np.array([0.0, 1.0, 2.0])
    m = Q.shape[0]
    excl_zone = int(np.ceil(m / 4))

    left = [
        [0, 1],
        [naive.distance(core.z_norm(Q), core.z_norm(T[5:5 + m])), 5],
        [naive.distance(core.z_norm(Q), core.z_norm(T[9:9 + m])), 9],
    ]
    right = list(naive_match(
        Q,
        T,
        excl_zone=excl_zone,
        max_distance=0.1,
    ))
    # To avoid sorting errors we first sort based on disance and then based on indices
    right.sort(key=lambda x: (x[1], x[0]))

    npt.assert_almost_equal(left, right)
示例#4
0
def test_motifs_max_matches():
    # This test covers the following:

    # A time series contains motif A at four locations and motif B at two.
    # If `max_motifs=2` the result should contain only the top two matches of motif A
    # and the top two matches of motif B as two separate motifs.
    T = np.array([
        0.0,  # motif A
        1.0,
        0.0,
        2.3,
        -1.0,  # motif B
        -1.0,
        -2.0,
        0.0,  # motif A
        1.0,
        0.0,
        -2.0,
        -1.0,  # motif B
        -1.03,
        -2.0,
        -0.5,
        2.0,  # motif A
        3.0,
        2.04,
        2.3,
        2.0,  # motif A
        3.0,
        2.02,
    ])
    m = 3
    max_motifs = 3

    left_indices = [[0, 7], [4, 11]]
    left_profile_values = [
        [0.0, 0.0],
        [
            0.0,
            naive.distance(
                core.z_norm(T[left_indices[1][0]:left_indices[1][0] + m]),
                core.z_norm(T[left_indices[1][1]:left_indices[1][1] + m]),
            ),
        ],
    ]

    mp = naive.stump(T, m)
    right_distance_values, right_indices = motifs(
        T,
        mp[:, 0],
        max_motifs=max_motifs,
        max_distance=0.1,
        cutoff=np.inf,
        max_matches=2,
    )

    # We ignore indices because of sorting ambiguities for equal distances.
    # As long as the distances are correct, the indices will be too.
    npt.assert_almost_equal(left_profile_values,
                            right_distance_values,
                            decimal=4)
示例#5
0
def test_aamp_naive_match_exclusion_zone():
    # The query appears as a perfect match at location 1 and as very close matches
    # (z-normalized distance of 0.05) at location 0 and 7 (at index 11, the query is
    # not matched in the aamp case).
    # However, since we apply an exclusion zone, the match at index 0 is ignored
    T = np.array([
        0.1, 1.0, 2.0, 0.0, 1.0, 2.0, -1.0, 0.1, 1.0, 2.0, -0.5, 0.2, 2.0, 4.0
    ])
    Q = np.array([0.0, 1.0, 2.0])
    m = Q.shape[0]
    # Extra large exclusion zone to exclude the first almost perfect match
    excl_zone = m

    left = [
        [0, 3],
        [naive.distance(Q, T[7:7 + m]), 7],
    ]
    right = list(
        naive_aamp_match(
            Q,
            T,
            excl_zone=excl_zone,
            max_distance=0.2,
        ))
    # To avoid sorting errors we first sort based on disance and then based on indices
    right.sort(key=lambda x: (x[0], x[1]))

    npt.assert_almost_equal(left, right)
示例#6
0
def naive_idx_to_mp(I, T, m, normalize=True):
    I = I.astype(np.int64)
    T = T.copy()
    T_isfinite = np.isfinite(T)
    T_subseqs_isfinite = np.all(core.rolling_window(T_isfinite, m), axis=1)

    T[~T_isfinite] = 0.0
    T_subseqs = core.rolling_window(T, m)
    nn_subseqs = T_subseqs[I]
    if normalize:
        P = naive.distance(naive.z_norm(T_subseqs, axis=1),
                           naive.z_norm(nn_subseqs, axis=1),
                           axis=1)
    else:
        P = naive.distance(T_subseqs, nn_subseqs, axis=1)
    P[~T_subseqs_isfinite] = np.inf
    P[I < 0] = np.inf

    return P