def test_pdf_boundary_simple(self): self.x_train = np.array([0]) self.x_test = np.array([0, 1, 2, 3]) for bw in [1e-3, 1e-2]: # note: for larger bandwidths, the pdf also needs to be truncated as +1, # which leads to something different than the scaling computed here hp_kernel1 = hp_kernels.WangRyzinOrdinal(data=self.x_train, bandwidth=bw, num_values=4, fix_boundary=False) hp_kernel2 = hp_kernels.WangRyzinOrdinal(data=self.x_train, bandwidth=bw, num_values=4, fix_boundary=True) hp_values1 = hp_kernel1(self.x_test).squeeze() hp_values2 = hp_kernel2(self.x_test).squeeze() weight = 1 - hp_values1[1:].sum() self.assertTrue(np.allclose(hp_values1 / weight, hp_values2, 1e-4)) self.x_train = np.array([3]) self.x_test = np.array([0, 1, 2, 3]) for bw in [1e-3, 1e-2]: # note: for larger bandwidths, the pdf also needs to be truncated as +1, # which leads to something different than the scaling computed here hp_kernel1 = hp_kernels.WangRyzinOrdinal(data=self.x_train, bandwidth=bw, num_values=4, fix_boundary=False) hp_kernel2 = hp_kernels.WangRyzinOrdinal(data=self.x_train, bandwidth=bw, num_values=4, fix_boundary=True) hp_values1 = hp_kernel1(self.x_test).squeeze() hp_values2 = hp_kernel2(self.x_test).squeeze() weight = 1 - hp_values1[:-1].sum() self.assertTrue(np.allclose(hp_values1 / weight, hp_values2, 1e-4)) # simple test based on 68, 95, 99% rule self.x_train = np.array([0.5]) for bw, w in ([0.5, 0.6827], [0.25, 0.9545], [1 / 6, 0.9973]): hp_kernel = hp_kernels.Gaussian(data=self.x_train, bandwidth=bw, fix_boundary=True) self.assertAlmostEqual(hp_kernel.weights[0], 1 / w, delta=1e-4)
def test_pdf_boundary_quadrature(self): self.x_test = np.array([0,1,2,3]) for bw in [1e-2, 1e-1, 0.99]: hp_kernel = hp_kernels.WangRyzinOrdinal(data=self.x_train, bandwidth=bw, num_values=4, fix_boundary=True) hp_values = hp_kernel(self.x_test).mean(axis=0) self.assertAlmostEqual(hp_values.sum(), 1, delta=1e-4)
def test_values(self): for bw in [1e-3, 1e-2, 1e-1, 1]: sm_values = np.array([sm_kernels.wang_ryzin(bw, self.x_train[:,None], x) for x in self.x_test]) hp_kernel = hp_kernels.WangRyzinOrdinal(data=self.x_train, bandwidth=bw, fix_boundary=False) hp_values = hp_kernel(self.x_test) self.assertTrue(np.allclose(hp_values.T, sm_values, 1e-4))
def test_sample(self): num_samples = 2**20 for bw in [1e-1, 5e-1, 0.99]: hp_kernel = hp_kernels.WangRyzinOrdinal(data=self.x_train, bandwidth=bw, num_values=4, fix_boundary=True) samples = hp_kernel.sample(num_samples=num_samples) phat1, x = np.histogram(samples, normed=True, bins=[-0.5, 0.5, 1.5, 2.5, 3.5]) phat2 = hp_kernel((x[1:] + x[:-1])/2).mean(axis=0) for p1, p2 in zip(phat1, phat2): self.assertAlmostEqual(p1, p2, delta=5e-2)