示例#1
0
def test_stamp_A_B_join(T_A, T_B):
    m = 3
    left = utils.naive_stamp(T_A, m, T_B=T_B)
    right = stamp.stamp(T_A, T_B, m)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, :2], right)
示例#2
0
def test_stump_one_constant_subsequence_A_B_join():
    T_A = np.random.rand(20)
    T_B = np.concatenate(
        (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
    m = 3
    left = np.array(
        [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)],
        dtype=object)
    right = stump(T_A, m, T_B, ignore_trivial=False)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices

    right = stump(pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices

    # Swap inputs
    left = np.array(
        [utils.naive_mass(Q, T_B, m) for Q in core.rolling_window(T_A, m)],
        dtype=object)
    right = stump(T_B, m, T_A, ignore_trivial=False)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices
示例#3
0
def test_scrump_plus_plus_self_join(T_A, T_B, percentages):
    m = 3
    zone = int(np.ceil(m / 4))

    for s in range(1, zone + 1):
        for percentage in percentages:
            seed = np.random.randint(100000)

            np.random.seed(seed)
            left_P, left_I = naive_prescrump(T_B,
                                             m,
                                             T_B,
                                             s=s,
                                             exclusion_zone=zone)
            left = naive_scrump(T_B, m, T_B, percentage, zone, True, s)
            for i in range(left.shape[0]):
                if left_P[i] < left[i, 0]:
                    left[i, 0] = left_P[i]
                    left[i, 1] = left_I[i]

            np.random.seed(seed)
            right_gen = scrump(T_B,
                               m,
                               ignore_trivial=True,
                               percentage=percentage,
                               pre_scrump=True,
                               s=s)
            right = next(right_gen)

            utils.replace_inf(left)
            utils.replace_inf(right)
            npt.assert_almost_equal(left, right)
示例#4
0
def test_scrump_constant_subsequence_self_join(percentages):
    T = np.concatenate((np.zeros(20,
                                 dtype=np.float64), np.ones(5,
                                                            dtype=np.float64)))

    m = 3
    zone = int(np.ceil(m / 4))

    for percentage in percentages:
        seed = np.random.randint(100000)

        np.random.seed(seed)
        left = naive_scrump(T, m, T, percentage, zone, False, None)

        np.random.seed(seed)
        right_gen = scrump(T,
                           m,
                           ignore_trivial=True,
                           percentage=percentage,
                           pre_scrump=False)
        right = next(right_gen)

        utils.replace_inf(left)
        utils.replace_inf(right)
        npt.assert_almost_equal(left, right)
示例#5
0
def test_stomp_A_B_join(T_A, T_B):
    m = 3
    left = utils.naive_stamp(T_A, m, T_B=T_B)
    right = stomp._stomp(T_A, m, T_B, ignore_trivial=False)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left, right)
示例#6
0
def test_scrump_nan_inf_self_join(T_A, T_B, substitute, substitution_locations,
                                  percentages):
    m = 3

    T_B_sub = T_B.copy()

    for substitution_location in substitution_locations:
        T_B_sub[:] = T_B[:]
        T_B_sub[substitution_location] = substitute

        zone = int(np.ceil(m / 4))

        for percentage in percentages:
            seed = np.random.randint(100000)

            np.random.seed(seed)
            left = naive_scrump(T_B_sub, m, T_B_sub, percentage, zone, False,
                                None)

            np.random.seed(seed)
            right_gen = scrump(T_B_sub,
                               m,
                               percentage=percentage,
                               pre_scrump=False)
            right = next(right_gen)

            utils.replace_inf(left)
            utils.replace_inf(right)
            npt.assert_almost_equal(left, right)
示例#7
0
def test_stamp_nan_inf_A_B_join(T_A, T_B, substitute_A, substitute_B,
                                substitution_locations):
    m = 3

    T_A_sub = T_A.copy()
    T_B_sub = T_B.copy()

    for substitution_location_B in substitution_locations:
        for substitution_location_A in substitution_locations:
            T_A_sub[:] = T_A[:]
            T_B_sub[:] = T_B[:]
            T_A_sub[substitution_location_A] = substitute_A
            T_B_sub[substitution_location_B] = substitute_B

            left = np.array(
                [
                    utils.naive_mass(Q, T_A_sub, m)
                    for Q in core.rolling_window(T_B_sub, m)
                ],
                dtype=object,
            )
            right = stamp.stamp(T_A_sub, T_B_sub, m)
            utils.replace_inf(left)
            utils.replace_inf(right)
            npt.assert_almost_equal(left[:, :2], right)
示例#8
0
def test_stamp_self_join(T_A, T_B):
    m = 3
    zone = int(np.ceil(m / 2))
    left = utils.naive_stamp(T_B, m, exclusion_zone=zone)
    right = stamp.stamp(T_B, T_B, m, ignore_trivial=True)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, :2], right)
示例#9
0
def test_stumped_A_B_join(T_A, T_B, dask_cluster):
    with Client(dask_cluster) as dask_client:
        m = 3
        left = utils.naive_stamp(T_A, m, T_B=T_B)
        right = stumped(dask_client, T_A, m, T_B, ignore_trivial=False)
        utils.replace_inf(left)
        utils.replace_inf(right)
        npt.assert_almost_equal(left, right)
示例#10
0
def test_stumped_self_join_df(T_A, T_B, dask_cluster):
    with Client(dask_cluster) as dask_client:
        m = 3
        zone = int(np.ceil(m / 4))
        left = utils.naive_stamp(T_B, m, exclusion_zone=zone)
        right = stumped(dask_client, pd.Series(T_B), m, ignore_trivial=True)
        utils.replace_inf(left)
        utils.replace_inf(right)
        npt.assert_almost_equal(left, right)
示例#11
0
def test_stumped_A_B_join(T_A, T_B, dask_client):
    m = 3
    left = np.array(
        [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)],
        dtype=object)
    right = stumped(dask_client, T_A, m, T_B, ignore_trivial=False)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left, right)
示例#12
0
def test_stamp_A_B_join(T_A, T_B):
    m = 3
    left = np.array(
        [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)],
        dtype=object)
    right = stamp.stamp(T_A, T_B, m)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, :2], right)
示例#13
0
def test_stump_self_join_larger_window(T_A, T_B):
    for m in [8, 16, 32]:
        if len(T_B) > m:
            zone = int(np.ceil(m / 4))
            left = utils.naive_stamp(T_B, m, exclusion_zone=zone)
            right = stomp._stomp(T_B, m, ignore_trivial=True)
            utils.replace_inf(left)
            utils.replace_inf(right)

            npt.assert_almost_equal(left, right)
示例#14
0
def test_stamp_nan_zero_mean_self_join():
    T = np.array([-1, 0, 1, np.inf, 1, 0, -1])
    m = 3

    zone = int(np.ceil(m / 2))
    left = utils.naive_stamp(T, m, exclusion_zone=zone)
    right = stamp.stamp(T, T, m, ignore_trivial=True)

    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, :2], right)
def test_one_constant_subsequence_A_B_join(dask_cluster):
    with Client(dask_cluster) as dask_client:
        T_A = np.random.rand(20)
        T_B = np.concatenate(
            (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
        m = 3
        left = utils.naive_stamp(T_A, m, T_B=T_B)
        right = stumped(dask_client, T_A, m, T_B, ignore_trivial=False)
        utils.replace_inf(left)
        utils.replace_inf(right)
        npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices
def test_one_constant_subsequence_self_join_df(dask_cluster):
    with Client(dask_cluster) as dask_client:
        T_A = np.concatenate(
            (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
        m = 3
        zone = int(np.ceil(m / 4))
        left = utils.naive_stamp(T_A, m, exclusion_zone=zone)
        right = stumped(dask_client, pd.Series(T_A), m, ignore_trivial=True)
        utils.replace_inf(left)
        utils.replace_inf(right)
        npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices
示例#17
0
def test_stumped_nan_zero_mean_self_join(dask_cluster):
    with Client(dask_cluster) as dask_client:
        T = np.array([-1, 0, 1, np.inf, 1, 0, -1])
        m = 3

        zone = int(np.ceil(m / 4))
        left = utils.naive_stamp(T, m, exclusion_zone=zone)
        right = stumped(dask_client, T, m, ignore_trivial=True)

        utils.replace_inf(left)
        utils.replace_inf(right)
        npt.assert_almost_equal(left, right)
示例#18
0
def test_stumped_A_B_join_df(T_A, T_B, dask_cluster):
    with Client(dask_cluster) as dask_client:
        m = 3
        left = np.array(
            [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B, m)],
            dtype=object,
        )
        right = stumped(
            dask_client, pd.Series(T_A), m, pd.Series(T_B), ignore_trivial=False
        )
        utils.replace_inf(left)
        utils.replace_inf(right)
        npt.assert_almost_equal(left, right)
示例#19
0
def test_two_constant_subsequences_A_B_join_swap(dask_client):
    T_A = np.concatenate(
        (np.zeros(10, dtype=np.float64), np.ones(10, dtype=np.float64)))
    T_B = np.concatenate(
        (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
    m = 3
    left = np.array(
        [utils.naive_mass(Q, T_B, m) for Q in core.rolling_window(T_A, m)],
        dtype=object)
    right = stumped(dask_client, T_B, m, T_A, ignore_trivial=False)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices
示例#20
0
def test_stamp_self_join(T_A, T_B):
    m = 3
    zone = int(np.ceil(m / 2))
    left = np.array(
        [
            utils.naive_mass(Q, T_B, m, i, zone, ignore_trivial=True)
            for i, Q in enumerate(core.rolling_window(T_B, m))
        ],
        dtype=object,
    )
    right = stamp.stamp(T_B, T_B, m, ignore_trivial=True)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, :2], right)
示例#21
0
def test_stumped_one_subsequence_inf_A_B_join(T_A, T_B,
                                              substitution_location_B,
                                              dask_cluster):
    with Client(dask_cluster) as dask_client:
        m = 3

        T_B_sub = T_B.copy()
        T_B_sub[substitution_location_B] = np.inf

        left = utils.naive_stamp(T_A, m, T_B=T_B_sub)
        right = stumped(dask_client, T_A, m, T_B_sub, ignore_trivial=False)
        utils.replace_inf(left)
        utils.replace_inf(right)
        npt.assert_almost_equal(left, right)
示例#22
0
def test_stumped_self_join_df(T_A, T_B, dask_client):
    m = 3
    zone = int(np.ceil(m / 4))
    left = np.array(
        [
            utils.naive_mass(Q, T_B, m, i, zone, True)
            for i, Q in enumerate(core.rolling_window(T_B, m))
        ],
        dtype=object,
    )
    right = stumped(dask_client, pd.Series(T_B), m, ignore_trivial=True)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left, right)
示例#23
0
def test_scrump_A_B_join_full(T_A, T_B):
    m = 3
    zone = int(np.ceil(m / 4))

    left = utils.naive_stamp(T_A, m, T_B=T_B)

    right_gen = scrump(T_A,
                       m,
                       T_B,
                       ignore_trivial=False,
                       percentage=1.0,
                       pre_scrump=False)
    right = next(right_gen)

    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, :2], right)

    # change roles
    left = utils.naive_stamp(T_B, m, T_B=T_A)

    right_gen = scrump(T_B,
                       m,
                       T_A,
                       ignore_trivial=False,
                       percentage=1.0,
                       pre_scrump=False)
    right = next(right_gen)

    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, :2], right)
示例#24
0
def test_stumped_one_subsequence_nan_self_join(
    T_A, T_B, substitution_location_B, dask_cluster
):
    with Client(dask_cluster) as dask_client:
        m = 3

        T_B_sub = T_B.copy()
        T_B_sub[substitution_location_B] = np.nan

        zone = int(np.ceil(m / 4))
        left = utils.naive_stamp(T_B_sub, m, exclusion_zone=zone)
        right = stumped(dask_client, T_B_sub, m, ignore_trivial=True)
        utils.replace_inf(left)
        utils.replace_inf(right)
        npt.assert_almost_equal(left, right)
示例#25
0
def test_stamp_nan_inf_self_join(T_A, T_B, substitute_B, substitution_locations):
    m = 3

    T_B_sub = T_B.copy()

    for substitution_location_B in substitution_locations:
        T_B_sub[:] = T_B[:]
        T_B_sub[substitution_location_B] = substitute_B

        zone = int(np.ceil(m / 2))
        left = utils.naive_stamp(T_B_sub, m, exclusion_zone=zone)
        right = stamp.stamp(T_B_sub, T_B_sub, m, ignore_trivial=True)
        utils.replace_inf(left)
        utils.replace_inf(right)
        npt.assert_almost_equal(left[:, :2], right)
示例#26
0
def test_one_constant_subsequence_self_join_df(dask_client):
    T_A = np.concatenate(
        (np.zeros(20, dtype=np.float64), np.ones(5, dtype=np.float64)))
    m = 3
    zone = int(np.ceil(m / 4))
    left = np.array(
        [
            utils.naive_mass(Q, T_A, m, i, zone, True)
            for i, Q in enumerate(core.rolling_window(T_A, m))
        ],
        dtype=object,
    )
    right = stumped(dask_client, pd.Series(T_A), m, ignore_trivial=True)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, 0], right[:, 0])  # ignore indices
示例#27
0
def test_stump_self_join_larger_window(T_A, T_B, dask_client):
    for m in [8, 16, 32]:
        if len(T_B) > m:
            zone = int(np.ceil(m / 4))
            left = np.array(
                [
                    utils.naive_mass(Q, T_B, m, i, zone, True)
                    for i, Q in enumerate(core.rolling_window(T_B, m))
                ],
                dtype=object,
            )
            right = stumped(dask_client, T_B, m, ignore_trivial=True)
            utils.replace_inf(left)
            utils.replace_inf(right)

            npt.assert_almost_equal(left, right)
示例#28
0
def test_scrump_self_join_full(T_A, T_B):
    m = 3
    zone = int(np.ceil(m / 4))

    left = utils.naive_stamp(T_B, m, exclusion_zone=zone)

    right_gen = scrump(T_B,
                       m,
                       ignore_trivial=True,
                       percentage=1.0,
                       pre_scrump=False)
    right = next(right_gen)

    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, :2], right)
def test_stumped_one_subsequence_inf_A_B_join(
    T_A, T_B, substitution_location_B, dask_client
):
    m = 3

    T_B_sub = T_B.copy()
    T_B_sub[substitution_location_B] = np.inf

    left = np.array(
        [utils.naive_mass(Q, T_A, m) for Q in core.rolling_window(T_B_sub, m)],
        dtype=object,
    )
    right = stumped(dask_client, T_A, m, T_B_sub, ignore_trivial=False)
    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left, right)
示例#30
0
def test_stamp_nan_zero_mean_self_join():
    T = np.array([-1, 0, 1, np.inf, 1, 0, -1])
    m = 3

    zone = int(np.ceil(m / 2))
    left = np.array(
        [
            utils.naive_mass(Q, T, m, i, zone, True)
            for i, Q in enumerate(core.rolling_window(T, m))
        ],
        dtype=object,
    )
    right = stamp.stamp(T, T, m, ignore_trivial=True)

    utils.replace_inf(left)
    utils.replace_inf(right)
    npt.assert_almost_equal(left[:, :2], right)