Exemplo n.º 1
0
def test_cosine_analytical(d):
    pytest.importorskip('scipy')  # beta, betainc, betaincinv

    dt = 0.0001
    x = np.arange(-1 + dt, 1, dt)

    def p(x, d):
        # unnormalized CosineSimilarity distribution, derived by Eric H.
        return (1 - x * x)**((d - 3) / 2.0)

    dist = dists.CosineSimilarity(d)

    pdf_exp = dist.pdf(x)
    pdf_act = p(x, d)

    cdf_exp = dist.cdf(x)
    cdf_act = np.cumsum(pdf_act) / np.sum(pdf_act)

    # Check that we get the expected pdf after normalization
    assert np.allclose(pdf_exp / np.sum(pdf_exp),
                       pdf_act / np.sum(pdf_act),
                       atol=0.01)

    # Check that this accumulates to the expected cdf
    assert np.allclose(cdf_exp, cdf_act, atol=0.01)

    # Check that the inverse cdf gives back x
    assert np.allclose(dist.ppf(cdf_exp), x, atol=0.01)
Exemplo n.º 2
0
def test_cosine_sample_shape(seed):
    """"Tests that CosineSimilarity sample has correct shape."""
    # sampling (n, d) should be the exact same as sampling (n*d,)
    n = 3
    d = 4
    dist = dists.CosineSimilarity(2)
    a = dist.sample(n, d, rng=np.random.RandomState(seed))
    b = dist.sample(n * d, rng=np.random.RandomState(seed))
    assert np.allclose(a.flatten(), b)
Exemplo n.º 3
0
def test_cosine_intercept(d, p, rng):
    """Tests CosineSimilarity inverse cdf for finding intercepts."""
    pytest.importorskip('scipy')  # betaincinv

    num_samples = 250

    exp_dist = dists.UniformHypersphere(surface=True)
    act_dist = dists.CosineSimilarity(d)

    dots = exp_dist.sample(num_samples, d, rng=rng)[:, 0]

    # Find the desired intercept so that dots >= c with probability p
    c = act_dist.ppf(1 - p)
    assert np.allclose(np.sum(dots >= c) / float(num_samples), p, atol=0.05)
Exemplo n.º 4
0
def test_cosine_similarity(d, rng):
    """Tests CosineSimilarity sampling."""
    num_samples = 2500
    num_bins = 5

    # Check that it gives a single dimension from UniformHypersphere
    exp_dist = dists.UniformHypersphere(surface=True)
    act_dist = dists.CosineSimilarity(d)

    exp = exp_dist.sample(num_samples, d, rng=rng)[:, 0]
    act = act_dist.sample(num_samples, rng=rng)

    exp_hist, _ = np.histogram(exp, bins=num_bins)
    act_hist, _ = np.histogram(act, bins=num_bins)

    assert np.all(np.abs(np.asfarray(exp_hist - act_hist) / num_samples) < 0.1)