Exemplo n.º 1
0
def test_grad_metrics_match_metrics(spatial_data, spatial_distances):
    for metric in dist.named_distances_with_gradients:
        if metric in spatial_distances:
            spatial_check(metric,
                          spatial_data,
                          spatial_distances,
                          with_grad=True)

    # Handle the few special distances separately
    # SEuclidean
    v = np.abs(np.random.randn(spatial_data.shape[1]))
    dist_matrix = pairwise_distances(spatial_data, metric="seuclidean", V=v)
    test_matrix = np.array([[
        dist.standardised_euclidean_grad(spatial_data[i], spatial_data[j],
                                         v)[0]
        for j in range(spatial_data.shape[0])
    ] for i in range(spatial_data.shape[0])])
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match "
        "for metric seuclidean",
    )

    # Weighted minkowski
    dist_matrix = pairwise_distances(spatial_data,
                                     metric="wminkowski",
                                     w=v,
                                     p=3)
    test_matrix = np.array([[
        dist.weighted_minkowski_grad(spatial_data[i], spatial_data[j], v,
                                     p=3)[0]
        for j in range(spatial_data.shape[0])
    ] for i in range(spatial_data.shape[0])])
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match "
        "for metric weighted_minkowski",
    )

    # Mahalanobis
    v = np.abs(np.random.randn(spatial_data.shape[1], spatial_data.shape[1]))
    dist_matrix = pairwise_distances(spatial_data, metric="mahalanobis", VI=v)
    test_matrix = np.array([[
        dist.mahalanobis_grad(spatial_data[i], spatial_data[j], v)[0]
        for j in range(spatial_data.shape[0])
    ] for i in range(spatial_data.shape[0])])
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match "
        "for metric mahalanobis",
    )

    # Hellinger
    dist_matrix = dist.pairwise_special_metric(np.abs(spatial_data[:-2]),
                                               np.abs(spatial_data[:-2]))
    test_matrix = np.array([[
        dist.hellinger_grad(np.abs(spatial_data[i]),
                            np.abs(spatial_data[j]))[0]
        for j in range(spatial_data.shape[0] - 2)
    ] for i in range(spatial_data.shape[0] - 2)])
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match "
        "for metric hellinger",
    )
Exemplo n.º 2
0
def test_grad_metrics_match_metrics():
    for metric in dist.named_distances_with_gradients:
        if metric in spatial_distances:
            dist_matrix = pairwise_distances(spatial_data, metric=metric)
            # scipy is bad sometimes
            if metric == "braycurtis":
                dist_matrix[np.where(~np.isfinite(dist_matrix))] = 0.0
            if metric in ("cosine", "correlation"):
                dist_matrix[np.where(~np.isfinite(dist_matrix))] = 1.0
                # And because distance between all zero vectors should be zero
                dist_matrix[10, 11] = 0.0
                dist_matrix[11, 10] = 0.0

            dist_function = dist.named_distances_with_gradients[metric]
            test_matrix = np.array(
                [
                    [
                        dist_function(spatial_data[i], spatial_data[j])[0]
                        for j in range(spatial_data.shape[0])
                    ]
                    for i in range(spatial_data.shape[0])
                ]
            )
            assert_array_almost_equal(
                test_matrix,
                dist_matrix,
                err_msg="Distances with grad don't match "
                "for metric {}".format(metric),
            )

    # Handle the few special distances separately
    # SEuclidean
    v = np.abs(np.random.randn(spatial_data.shape[1]))
    dist_matrix = pairwise_distances(spatial_data, metric="seuclidean", V=v)
    test_matrix = np.array(
        [
            [
                dist.standardised_euclidean_grad(spatial_data[i], spatial_data[j], v)[0]
                for j in range(spatial_data.shape[0])
            ]
            for i in range(spatial_data.shape[0])
        ]
    )
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric seuclidean",
    )

    # Weighted minkowski
    dist_matrix = pairwise_distances(spatial_data, metric="wminkowski", w=v, p=3)
    test_matrix = np.array(
        [
            [
                dist.weighted_minkowski_grad(spatial_data[i], spatial_data[j], v, p=3)[
                    0
                ]
                for j in range(spatial_data.shape[0])
            ]
            for i in range(spatial_data.shape[0])
        ]
    )
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric weighted_minkowski",
    )
    # Mahalanobis
    v = np.abs(np.random.randn(spatial_data.shape[1], spatial_data.shape[1]))
    dist_matrix = pairwise_distances(spatial_data, metric="mahalanobis", VI=v)
    test_matrix = np.array(
        [
            [
                dist.mahalanobis_grad(spatial_data[i], spatial_data[j], v)[0]
                for j in range(spatial_data.shape[0])
            ]
            for i in range(spatial_data.shape[0])
        ]
    )
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric mahalanobis",
    )

    # Hellinger
    dist_matrix = dist.pairwise_special_metric(
        np.abs(spatial_data[:-2]), np.abs(spatial_data[:-2])
    )
    test_matrix = np.array(
        [
            [
                dist.hellinger_grad(np.abs(spatial_data[i]), np.abs(spatial_data[j]))[0]
                for j in range(spatial_data.shape[0] - 2)
            ]
            for i in range(spatial_data.shape[0] - 2)
        ]
    )
    assert_array_almost_equal(
        test_matrix,
        dist_matrix,
        err_msg="Distances don't match " "for metric hellinger",
    )
def test_chunked_parallel_special_metric_implementation_hellinger(
    spatial_data,
    stashed_previous_impl_for_regression_test,
):

    # Base tests that must pass!
    dist_matrix_x = dist.chunked_parallel_special_metric(
        np.abs(spatial_data[:-2]))
    dist_matrix_xy = dist.chunked_parallel_special_metric(
        np.abs(spatial_data[:-2]), np.abs(spatial_data[:-2]))
    test_matrix = np.array([[
        dist.hellinger_grad(np.abs(spatial_data[i]),
                            np.abs(spatial_data[j]))[0]
        for j in range(spatial_data.shape[0] - 2)
    ] for i in range(spatial_data.shape[0] - 2)]).astype(np.float32)

    assert_array_equal(
        test_matrix,
        dist_matrix_x,
        err_msg="Distances don't match for metric hellinger",
    )

    assert_array_equal(
        test_matrix,
        dist_matrix_xy,
        err_msg="Distances don't match for metric hellinger",
    )

    # Test to compare chunked_parallel different implementations
    dist_x_stashed = stashed_previous_impl_for_regression_test(
        np.abs(spatial_data[:-2]))
    dist_xy_stashed = stashed_previous_impl_for_regression_test(
        np.abs(spatial_data[:-2]), np.abs(spatial_data[:-2]))

    assert_array_equal(
        dist_xy_stashed,
        dist_matrix_xy,
        err_msg=
        "Distances don't match between stashed and current chunked_parallel implementations with X and Y!",
    )

    assert_array_equal(
        dist_x_stashed,
        dist_matrix_x,
        err_msg=
        "Distances don't match between stashed and current chunked_parallel implementations with X only!",
    )

    # test hellinger on different X and Y Pair
    spatial_data_two = np.random.randn(10, 20)
    dist_stashed_diff_pair = stashed_previous_impl_for_regression_test(
        np.abs(spatial_data[:-2]), spatial_data_two)
    dist_chunked_diff_pair = dist.chunked_parallel_special_metric(
        np.abs(spatial_data[:-2]), spatial_data_two)

    assert_array_equal(
        dist_stashed_diff_pair,
        dist_chunked_diff_pair,
        err_msg=
        "Distances don't match between stashed and current chunked_parallel implementations",
    )