def test_reflect_2d(self): print("\n|Test_KDE_Resample:test_reflect_2d()|") seed = np.random.randint(int(1e4)) seed = 8067 print(seed) np.random.seed(seed) NUM = 2000 xx = np.random.uniform(0.0, 2.0, NUM) yy = np.random.normal(1.0, 1.5, NUM) yy = yy[yy < 2.0] yy = np.concatenate([yy, np.random.choice(yy, NUM - yy.size)]) data = [xx, yy] edges = [utils.spacing(aa, 'lin', 30) for aa in [xx, yy]] egrid = [utils.spacing(ee, 'lin', 100, stretch=0.5) for ee in edges] cgrid = [utils.midpoints(ee, 'lin') for ee in egrid] # width = [np.diff(ee) for ee in egrid] xc, yc = np.meshgrid(*cgrid, indexing='ij') # grid = np.vstack([xc.ravel(), yc.ravel()]) hist, *_ = np.histogram2d(*data, bins=egrid, density=True) kde = kale.KDE(data) reflections = [[[0.0, 2.0], [None, 2.0]], [[0.0, 2.0], None], [None, [None, 2.0]], None] for jj, reflect in enumerate(reflections): samps_ref = kde.resample(reflect=reflect) samps_nrm = kde.resample() if reflect is None: continue for ii, ref in enumerate(reflect): if ref is None: continue if ref[0] is None: ref[0] = -np.inf if ref[1] is None: ref[1] = np.inf print(jj, ii, ref) for kk, zz in enumerate([samps_nrm[ii], samps_ref[ii]]): inside = (ref[0] < zz) & (zz < ref[1]) outside = ((zz < ref[0]) | (ref[1] < zz)) print("\tin : ", kk, np.all(inside), np.any(inside)) print("\tout: ", kk, np.all(outside), np.any(outside)) if kk == 0: assert_false(np.all(inside)) assert_true(np.any(outside)) else: assert_true(np.all(inside)) assert_false(np.any(outside)) return
def compare_scipy_1d(self, kernel): print("\n|Test_KDE_PDF:test_compare_scipy_1d()|") NUM = 100 a1 = np.random.normal(6.0, 1.0, NUM // 2) a2 = np.random.lognormal(0, 0.5, size=NUM // 2) aa = np.concatenate([a1, a2]) bins = utils.spacing([-1, 14.0], 'lin', 40) grid = utils.spacing(bins, 'lin', 3000) methods = ['scott', 0.04, 0.2, 0.8] classes = [ lambda xx, bw: sp.stats.gaussian_kde(xx, bw_method=bw), lambda xx, bw: kale.KDE(xx, bandwidth=bw, kernel=kernel) ] for mm in methods: kde_list = [] for cc in classes: try: test = cc(aa, mm).density(grid, probability=True)[1] except AttributeError: test = cc(aa, mm).pdf(grid) kde_list.append(test) print("method: {}".format(mm)) print("\t" + utils.stats_str(kde_list[0])) print("\t" + utils.stats_str(kde_list[1])) assert_true(np.allclose(kde_list[0], kde_list[1])) return
def reflect_2d(self, kernel): print("\n|Test_KDE_PDF:test_reflect_2d()|") np.random.seed(124) NUM = 1000 xx = np.random.uniform(0.0, 2.0, NUM) yy = np.random.normal(1.0, 1.0, NUM) yy = yy[yy < 2.0] yy = np.concatenate([yy, np.random.choice(yy, NUM-yy.size)]) data = [xx, yy] edges = [utils.spacing(aa, 'lin', 30) for aa in [xx, yy]] egrid = [utils.spacing(ee, 'lin', 100, stretch=0.5) for ee in edges] cgrid = [utils.midpoints(ee, 'lin') for ee in egrid] width = [np.diff(ee) for ee in egrid] xc, yc = np.meshgrid(*cgrid, indexing='ij') grid = np.vstack([xc.ravel(), yc.ravel()]) hist, *_ = np.histogram2d(*data, bins=egrid, density=True) kde = kale.KDE(data, kernel=kernel) inside_test_func = np.all if kernel._FINITE == 'infinite' else np.any reflections = [ [[0.0, 2.0], [None, 2.0]], [[0.0, 2.0], None], [None, [None, 2.0]], None ] for jj, reflect in enumerate(reflections): pdf_1d = kde.density(grid, reflect=reflect, probability=True)[1] pdf = pdf_1d.reshape(hist.shape) inside = np.ones_like(pdf_1d, dtype=bool) if reflect is None: outside = np.zeros_like(pdf_1d, dtype=bool) else: outside = np.ones_like(pdf_1d, dtype=bool) for ii, ref in enumerate(reflect): if ref is None: ref = [-np.inf, np.inf] if ref[0] is None: ref[0] = -np.inf if ref[1] is None: ref[1] = np.inf inside = inside & (ref[0] < grid[ii]) & (grid[ii] < ref[1]) outside = outside & ((grid[ii] < ref[0]) | (ref[1] < grid[ii])) assert_true(inside_test_func(pdf_1d[inside] > 0.0)) assert_true(np.allclose(pdf_1d[outside], 0.0)) area = width[0][:, np.newaxis] * width[1][np.newaxis, :] prob_tot = np.sum(pdf * area) print(jj, reflect, "prob_tot = {:.4e}".format(prob_tot)) assert_true(np.isclose(prob_tot, 1.0, rtol=3e-2)) return
def test_different_bws(self): print("\n|Test_KDE_Resample:test_different_bws()|") np.random.seed(9235) NUM = 1000 a1 = np.random.normal(6.0, 1.0, NUM // 2) a2 = np.random.lognormal(0, 0.5, size=NUM // 2) aa = np.concatenate([a1, a2]) bb = np.random.normal(3.0, 0.02, NUM) + aa / 100 data = [aa, bb] edges = [utils.spacing(dd, 'lin', 100, stretch=1.0) for dd in data] cents = [utils.midpoints(ee, 'lin') for ee in edges] xe, ye = np.meshgrid(*edges, indexing='ij') xc, yc = np.meshgrid(*cents, indexing='ij') bws = [0.5, 2.0] kde2d = kale.KDE(data, bandwidth=bws) kde1d = [kale.KDE(dd, bandwidth=ss) for dd, ss in zip(data, bws)] for ii in range(2): samp_1d = kde1d[ii].resample(NUM).squeeze() samp_2d = kde2d.resample(NUM)[ii] # Make sure the two distributions resemble eachother ks, pv = sp.stats.ks_2samp(samp_1d, samp_2d) # Calibrated to the above seed-value of `9235` print("{}, pv = {}".format(ii, pv)) assert_true(pv > 0.05) return
def pdf_params_fixed_bandwidth(self, kernel): print("\n|Test_KDE_PDF:pdf_params_fixed_bandwidth()|") np.random.seed(124) NUM = 1000 bandwidth = 0.02 sigma = [2.5, 1.5] corr = 0.9 s2 = np.square(sigma) cc = corr * sigma[0] * sigma[1] cov = [[s2[0], cc], [cc, s2[1]]] cov = np.array(cov) data = np.random.multivariate_normal([1.0, 2.0], cov, NUM).T sigma = [2.5, 0.5] corr = 0.0 s2 = np.square(sigma) cc = corr * sigma[0] * sigma[1] cov = [[s2[0], cc], [cc, s2[1]]] cov = np.array(cov) more = np.random.multivariate_normal([1.0, 6.0], cov, NUM).T data = np.concatenate([data, more], axis=-1) kde = kale.KDE(data, bandwidth=bandwidth, kernel=kernel) edges = [utils.spacing(dd, 'lin', 200, stretch=0.1) for dd in data] cents = [utils.midpoints(ee, 'lin') for ee in edges] widths = [np.diff(ee) for ee in edges] # area = widths[0][:, np.newaxis] * widths[1][np.newaxis, :] xe, ye = np.meshgrid(*edges, indexing='ij') xc, yc = np.meshgrid(*cents, indexing='ij') # grid = np.vstack([xc.ravel(), yc.ravel()]) hist, *_ = np.histogram2d(*data, bins=edges, density=True) for par in range(2): xx = cents[par] pdf_2d = kde.density(xx, params=par, probability=True)[1] kde_1d = kale.KDE(data[par, :], bandwidth=bandwidth, kernel=kernel) pdf_1d = kde_1d.density(xx, probability=True)[1] # print("matrix : ", kde.bandwidth.matrix, kde_1d.bandwidth.matrix) print(f"pdf_1d = {utils.stats_str(pdf_1d)}") print(f"pdf_2d = {utils.stats_str(pdf_2d)}") assert_true(np.allclose(pdf_2d, pdf_1d, rtol=1e-3)) for pdf, ls, lw in zip([pdf_2d, pdf_1d], ['-', '--'], [1.5, 3.0]): tot = np.sum(pdf * widths[par]) print("tot = {:.4e}".format(tot)) assert_true(np.isclose(tot, 1.0, rtol=2e-2)) vals = [xx, pdf] if par == 1: vals = vals[::-1] return
def compare_scipy_2d(self, kernel): print("\n|Test_KDE_PDF:test_compare_scipy_2d()|") NUM = 1000 a1 = np.random.normal(6.0, 1.0, NUM//2) a2 = np.random.lognormal(0, 0.5, size=NUM//2) aa = np.concatenate([a1, a2]) bb = np.random.normal(3.0, 0.02, NUM) + aa/100 data = [aa, bb] edges = [utils.spacing(dd, 'lin', 30, stretch=0.5) for dd in data] cents = [utils.midpoints(ee, 'lin') for ee in edges] xe, ye = np.meshgrid(*edges, indexing='ij') xc, yc = np.meshgrid(*cents, indexing='ij') grid = np.vstack([xc.ravel(), yc.ravel()]) methods = ['scott', 0.04, 0.2, 0.8] # classes = [sp.stats.gaussian_kde, kale.KDE] classes = [lambda xx, bw: sp.stats.gaussian_kde(xx, bw_method=bw), lambda xx, bw: kale.KDE(xx, bandwidth=bw, kernel=kernel)] for mm in methods: kdes_list = [] for cc in classes: try: test = cc(data, mm).density(grid, probability=True)[1].reshape(xc.shape).T except AttributeError: test = cc(data, mm).pdf(grid).reshape(xc.shape).T kdes_list.append(test) assert_true(np.allclose(kdes_list[0], kdes_list[1])) return
def test_log(self): print("\n|Test_Spacing:test_log()|") aa = [ 0.56979885, 0.06782166, 38.00982397, 0.76822742, 0.24328732, 18.22846225, 7.22905804, 0.5140395, 0.97960639, 14.57931413 ] bb = [ 0.06782166, 0.13701255, 0.27679121, 0.55917048, 1.12962989, 2.28206553, 4.61020298, 9.31347996, 18.81498695, 38.00982397 ] test = utils.spacing(aa, 'log', np.size(bb)) assert_true(np.allclose(bb, test)) return
def test_lin(self): print("\n|Test_Spacing:test_lin()|") aa = [ 64.15474369, 30.23993491, 18.74843086, 90.36893423, 81.49347391, 21.66373546, 26.36243961, 9.54536041, 33.48985127, 87.77429238 ] bb = [ 9.54536041, 18.5257575, 27.5061546, 36.48655169, 45.46694878, 54.44734587, 63.42774296, 72.40814005, 81.38853714, 90.36893423 ] test = utils.spacing(aa, 'lin', np.size(bb)) assert_true(np.allclose(bb, test)) return
def reflect_1d(self, kernel): print("\n|Test_KDE_PDF:reflect_1d()|") np.random.seed(124) NUM = 1000 EXTR = [0.0, 2.0] aa = np.random.uniform(*EXTR, NUM) egrid = utils.spacing(aa, 'lin', 2000, stretch=0.5) cgrid = utils.midpoints(egrid, 'lin') delta = np.diff(egrid) boundaries = [None, EXTR] for bnd in boundaries: kde = kale.KDE(aa, kernel=kernel) pdf = kde.density(cgrid, reflect=bnd, probability=True)[1] # If the kernel's support is infinite, then all points outside of boundaries should be # nonzero; if it's finite-supported, then only some of them (near edges) will be outside_test_func = np.all if kernel._FINITE == 'infinite' else np.any # Make sure unitarity is preserved tot = np.sum(pdf * delta) print("Boundary '{}', total = {:.4e}".format(bnd, tot)) assert_true(np.isclose(tot, 1.0, rtol=1e-3)) ratio_extr = np.max(pdf) / np.min(pdf[pdf > 0]) # No reflection, then non-zero PDF everywhere, and large ratio of extrema if bnd is None: assert_true(outside_test_func(pdf[cgrid < EXTR[0]] > 0.0)) assert_true(outside_test_func(pdf[cgrid > EXTR[1]] > 0.0)) assert_true(ratio_extr > 10.0) # No lower-reflection, nonzero values below 0.0 elif bnd[0] is None: assert_true(outside_test_func(pdf[cgrid < EXTR[0]] > 0.0)) assert_true(np.all(pdf[cgrid > EXTR[1]] == 0.0)) # No upper-reflection, nonzero values above 2.0 elif bnd[1] is None: assert_true(np.all(pdf[cgrid < EXTR[0]] == 0.0)) assert_true(outside_test_func(pdf[cgrid > EXTR[1]] > 0.0)) else: assert_true(np.all(pdf[cgrid < EXTR[0]] == 0.0)) assert_true(np.all(pdf[cgrid > EXTR[1]] == 0.0)) assert_true(ratio_extr < 2.0) return