def test_cosine_analytical(d, allclose): pytest.importorskip("scipy") # beta, betainc, betaincinv dt = 0.0001 x = np.arange(-1 + dt, 1, dt) def p(x, d): # unnormalized CosineSimilarity distribution, derived by Eric H. return (1 - x * x) ** ((d - 3) / 2.0) dist = CosineSimilarity(d) pdf_exp = dist.pdf(x) pdf_act = p(x, d) cdf_exp = dist.cdf(x) cdf_act = np.cumsum(pdf_act) / np.sum(pdf_act) # Check that we get the expected pdf after normalization assert allclose(pdf_exp / np.sum(pdf_exp), pdf_act / np.sum(pdf_act), atol=0.01) # Check that this accumulates to the expected cdf assert allclose(cdf_exp, cdf_act, atol=0.01) # Check that the inverse cdf gives back x assert allclose(dist.ppf(cdf_exp), x, atol=0.01)
def test_cosine_intercept(d, p, rng, allclose): """Tests CosineSimilarity inverse cdf for finding intercepts.""" pytest.importorskip("scipy") # betaincinv num_samples = 500 exp_dist = UniformHypersphere(surface=True) act_dist = CosineSimilarity(d) dots = exp_dist.sample(num_samples, d, rng=rng)[:, 0] # Find the desired intercept so that dots >= c with probability p c = act_dist.ppf(1 - p) assert allclose(np.sum(dots >= c) / float(num_samples), p, atol=0.05)