def job_me_opt(p, data_source, tr, te, r, J=5): """ ME test of Jitkrittum et al., 2016 used as a goodness-of-fit test. Gaussian kernel. Optimize test locations and Gaussian width. """ data = tr + te X = data.data() with util.ContextTimer() as t: # median heuristic #pds = p.get_datasource() #datY = pds.sample(data.sample_size(), seed=r+294) #Y = datY.data() #XY = np.vstack((X, Y)) #med = util.meddistance(XY, subsample=1000) op = { 'n_test_locs': J, 'seed': r + 5, 'max_iter': 40, 'batch_proportion': 1.0, 'locs_step_size': 1.0, 'gwidth_step_size': 0.1, 'tol_fun': 1e-4, 'reg': 1e-4 } # optimize on the training set me_opt = tgof.GaussMETestOpt(p, n_locs=J, tr_proportion=tr_proportion, alpha=alpha, seed=r + 111) me_result = me_opt.perform_test(data, op) return {'test_result': me_result, 'time_secs': t.secs}
def perform_test(self, dat, return_simulated_stats=False): """ dat: an instance of Data """ with util.ContextTimer() as t: alpha = self.alpha null_sim = self.null_sim n_simulate = null_sim.n_simulate X = dat.data() n = X.shape[0] J = self.V.shape[0] nfssd, fea_tensor = self.compute_stat(dat, return_feature_tensor=True) sim_results = null_sim.simulate(self, dat, fea_tensor) arr_nfssd = sim_results['sim_stats'] # approximate p-value with the permutations pvalue = np.mean(arr_nfssd > nfssd) results = { 'alpha': self.alpha, 'pvalue': pvalue, 'test_stat': nfssd, 'h0_rejected': pvalue < alpha, 'n_simulate': n_simulate, 'time_secs': t.secs, } if return_simulated_stats: results['sim_stats'] = arr_nfssd return results
def job_fssdJ1q_med(p, data_source, tr, te, r, J=1, null_sim=None): """ FSSD test with a Gaussian kernel, where the test locations are randomized, and the Gaussian width is set with the median heuristic. Use full sample. No training/testing splits. p: an UnnormalizedDensity data_source: a DataSource tr, te: Data r: trial number (positive integer) """ if null_sim is None: null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=r) # full data data = tr + te X = data.data() with util.ContextTimer() as t: # median heuristic med = util.meddistance(X, subsample=1000) k = kernel.KGauss(med**2) V = util.fit_gaussian_draw(X, J, seed=r + 3) fssd_med = gof.FSSD(p, k, V, null_sim=null_sim, alpha=alpha) fssd_med_result = fssd_med.perform_test(data) return { 'goftest': fssd_med, 'test_result': fssd_med_result, 'time_secs': t.secs }
def job_mmd_med(p, data_source, tr, te, r): """ MMD test of Gretton et al., 2012 used as a goodness-of-fit test. Require the ability to sample from p i.e., the UnnormalizedDensity p has to be able to return a non-None from get_datasource() """ # full data data = tr + te X = data.data() with util.ContextTimer() as t: # median heuristic pds = p.get_datasource() datY = pds.sample(data.sample_size(), seed=r + 294) Y = datY.data() XY = np.vstack((X, Y)) # If p, q differ very little, the median may be very small, rejecting H0 # when it should not? medx = util.meddistance(X, subsample=1000) medy = util.meddistance(Y, subsample=1000) medxy = util.meddistance(XY, subsample=1000) med_avg = (medx + medy + medxy) / 3.0 k = kernel.KGauss(med_avg**2) mmd_test = mgof.QuadMMDGof(p, k, n_permute=400, alpha=alpha, seed=r) mmd_result = mmd_test.perform_test(data) return {'test_result': mmd_result, 'time_secs': t.secs}
def job_mmd_opt(p, data_source, tr, te, r): """ MMD test of Gretton et al., 2012 used as a goodness-of-fit test. Require the ability to sample from p i.e., the UnnormalizedDensity p has to be able to return a non-None from get_datasource() With optimization. Gaussian kernel. """ data = tr + te X = data.data() with util.ContextTimer() as t: # median heuristic pds = p.get_datasource() datY = pds.sample(data.sample_size(), seed=r+294) Y = datY.data() XY = np.vstack((X, Y)) med = util.meddistance(XY, subsample=1000) # Construct a list of kernels to try based on multiples of the median # heuristic #list_gwidth = np.hstack( (np.linspace(20, 40, 10), (med**2) # *(2.0**np.linspace(-2, 2, 20) ) ) ) list_gwidth = (med**2)*(2.0**np.linspace(-4, 4, 30) ) list_gwidth.sort() candidate_kernels = [kernel.KGauss(gw2) for gw2 in list_gwidth] mmd_opt = mgof.QuadMMDGofOpt(p, n_permute=300, alpha=alpha, seed=r) mmd_result = mmd_opt.perform_test(data, candidate_kernels=candidate_kernels, tr_proportion=tr_proportion, reg=1e-3) return { 'test_result': mmd_result, 'time_secs': t.secs}
def compute(self): p = self.p data_source = self.data_source r = self.rep prob_param = self.prob_param job_func = self.job_func # sample_size is a global variable data = data_source.sample(sample_size, seed=r) with util.ContextTimer() as t: tr, te = data.split_tr_te(tr_proportion=tr_proportion, seed=r + 21) prob_label = self.prob_label logger.info("computing. %s. prob=%s, r=%d,\ param=%.3g" % (job_func.__name__, prob_label, r, prob_param)) job_result = job_func(p, data_source, tr, te, r) # create ScalarResult instance result = SingleResult(job_result) # submit the result to my own aggregator self.aggregator.submit_result(result) func_name = job_func.__name__ logger.info("done. ex2: %s, prob=%s, r=%d, param=%.3g. Took: %.3g s " % (func_name, prob_label, r, prob_param, t.secs)) # save result fname = '%s-%s-n%d_r%d_p%g_a%.3f_trp%.2f.p' \ %(prob_label, func_name, sample_size, r, prob_param, alpha, tr_proportion) glo.ex_save_result(ex, job_result, prob_label, fname)
def job_mmd_opt(p, data_source, tr, te, r, model_sample): # full data data = tr + te X = data.data() with util.ContextTimer() as t: mmd = gof.QuadMMDGofOpt(p, alpha=args.alpha, seed=r) mmd_result = mmd.perform_test(data, model_sample) return {'test_result': mmd_result, 'time_secs': t.secs}
def perform_test(self, dat, model_sample, candidate_kernels=None, return_mmdtest=False, tr_proportion=0.2, reg=1e-3): """ dat: an instance of Data candidate_kernels: a list of Kernel's to choose from tr_proportion: proportion of sample to be used to choosing the best kernel reg: regularization parameter for the test power criterion """ with util.ContextTimer() as t: seed = self.seed p = self.p #ds = p.get_datasource() #p_sample = ds.sample(dat.sample_size(), seed=seed+77) p_sample = model_sample xtr, xte = p_sample.split_tr_te(tr_proportion=tr_proportion, seed=seed + 18) # ytr, yte are of type data.Data ytr, yte = dat.split_tr_te(tr_proportion=tr_proportion, seed=seed + 12) # training and test data tr_tst_data = fdata.TSTData(xtr.data(), ytr.data()) te_tst_data = fdata.TSTData(xte.data(), yte.data()) if candidate_kernels is None: # Assume a Gaussian kernel. Construct a list of # kernels to try based on multiples of the median heuristic med = util.meddistance(tr_tst_data.stack_xy(), 1000) list_gwidth = np.hstack( ((med**2) * (2.0**np.linspace(-4, 4, 10)))) list_gwidth.sort() candidate_kernels = [kernel.KGauss(gw2) for gw2 in list_gwidth] alpha = self.alpha # grid search to choose the best Gaussian width besti, powers = tst.QuadMMDTest.grid_search_kernel( tr_tst_data, candidate_kernels, alpha, reg=reg) # perform test best_ker = candidate_kernels[besti] mmdtest = tst.QuadMMDTest(best_ker, self.n_permute, alpha=alpha) results = mmdtest.perform_test(te_tst_data) if return_mmdtest: results['mmdtest'] = mmdtest results['time_secs'] = t.secs return results
def job_fssdJ1q_imq_optv(p, data_source, tr, te, r, J=1, b=-0.5, null_sim=None): """ FSSD with optimization on tr. Test on te. Use an inverse multiquadric kernel (IMQ). Optimize only the test locations (V). Fix the kernel parameters to b = -0.5, c=1. These are the recommended values from Measuring Sample Quality with Kernels Jackson Gorham, Lester Mackey """ if null_sim is None: null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=r) Xtr = tr.data() with util.ContextTimer() as t: # IMQ kernel parameters: b and c c = 1.0 # fit a Gaussian to the data and draw to initialize V0 V0 = util.fit_gaussian_draw(Xtr, J, seed=r + 1, reg=1e-6) ops = { 'reg': 1e-5, 'max_iter': 30, 'tol_fun': 1e-6, 'disp': True, 'locs_bounds_frac': 20.0, } V_opt, info = gof.IMQFSSD.optimize_locs(p, tr, b, c, V0, **ops) k_imq = kernel.KIMQ(b=b, c=c) # Use the optimized parameters to construct a test fssd_imq = gof.FSSD(p, k_imq, V_opt, null_sim=null_sim, alpha=alpha) fssd_imq_result = fssd_imq.perform_test(te) return { 'test_result': fssd_imq_result, 'time_secs': t.secs, 'goftest': fssd_imq, 'opt_info': info, }
def job_kstein_med(p, data_source, tr, te, r): """ Kernel Stein discrepancy test of Liu et al., 2016 and Chwialkowski et al., 2016. Use full sample. Use Gaussian kernel. """ # full data data = tr + te X = data.data() with util.ContextTimer() as t: # median heuristic med = util.meddistance(X, subsample=1000) k = kernel.KGauss(med ** 2) kstein = gof.KernelSteinTest(p, k, alpha=args.alpha, n_simulate=1000, seed=r) kstein_result = kstein.perform_test(data) return {'test_result': kstein_result, 'time_secs': t.secs}
def job_lin_kstein_med(p, data_source, tr, te, r): """ Linear-time version of the kernel Stein discrepancy test of Liu et al., 2016 and Chwialkowski et al., 2016. Use full sample. """ # full data data = tr + te X = data.data() with util.ContextTimer() as t: # median heuristic med = util.meddistance(X, subsample=1000) k = kernel.KGauss(med**2) lin_kstein = gof.LinearKernelSteinTest(p, k, alpha=alpha, seed=r) lin_kstein_result = lin_kstein.perform_test(data) return {'test_result': lin_kstein_result, 'time_secs': t.secs}
def job_fssdJ1q_opt(p, data_source, tr, te, r, J=1, null_sim=None): """ FSSD with optimization on tr. Test on te. Use a Gaussian kernel. """ if null_sim is None: null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=r) Xtr = tr.data() with util.ContextTimer() as t: # Use grid search to initialize the gwidth n_gwidth_cand = 5 gwidth_factors = 2.0**np.linspace(-3, 3, n_gwidth_cand) med2 = util.meddistance(Xtr, 1000)**2 k = kernel.KGauss(med2 * 2) # fit a Gaussian to the data and draw to initialize V0 V0 = util.fit_gaussian_draw(Xtr, J, seed=r + 1, reg=1e-6) list_gwidth = np.hstack(((med2) * gwidth_factors)) besti, objs = gof.GaussFSSD.grid_search_gwidth(p, tr, V0, list_gwidth) gwidth = list_gwidth[besti] assert util.is_real_num( gwidth), 'gwidth not real. Was %s' % str(gwidth) assert gwidth > 0, 'gwidth not positive. Was %.3g' % gwidth logging.info('After grid search, gwidth=%.3g' % gwidth) ops = { 'reg': 1e-2, 'max_iter': 40, 'tol_fun': 1e-4, 'disp': True, 'locs_bounds_frac': 10.0, 'gwidth_lb': 1e-1, 'gwidth_ub': 1e4, } V_opt, gwidth_opt, info = gof.GaussFSSD.optimize_locs_widths( p, tr, gwidth, V0, **ops) # Use the optimized parameters to construct a test k_opt = kernel.KGauss(gwidth_opt) fssd_opt = gof.FSSD(p, k_opt, V_opt, null_sim=null_sim, alpha=alpha) fssd_opt_result = fssd_opt.perform_test(te) return { 'test_result': fssd_opt_result, 'time_secs': t.secs, 'goftest': fssd_opt, 'opt_info': info, }
def job_fssdJ1q_imq_optbv(p, data_source, tr, te, r, J=1, null_sim=None): """ FSSD with optimization on tr. Test on te. Use an inverse multiquadric kernel (IMQ). Optimize the test locations (V), and b. Fix c (in the kernel) """ if null_sim is None: null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=r) Xtr = tr.data() with util.ContextTimer() as t: # Initial IMQ kernel parameters: b and c b0 = -0.5 # Fix c to this value c = 1.0 c0 = c # fit a Gaussian to the data and draw to initialize V0 V0 = util.fit_gaussian_draw(Xtr, J, seed=r + 1, reg=1e-6) ops = { 'reg': 1e-5, 'max_iter': 40, 'tol_fun': 1e-6, 'disp': True, 'locs_bounds_frac': 20.0, # IMQ kernel bounds 'b_lb': -20, 'c_lb': c, 'c_ub': c, } V_opt, b_opt, c_opt, info = gof.IMQFSSD.optimize_locs_params( p, tr, b0, c0, V0, **ops) k_imq = kernel.KIMQ(b=b_opt, c=c_opt) # Use the optimized parameters to construct a test fssd_imq = gof.FSSD(p, k_imq, V_opt, null_sim=null_sim, alpha=alpha) fssd_imq_result = fssd_imq.perform_test(te) return { 'test_result': fssd_imq_result, 'time_secs': t.secs, 'goftest': fssd_imq, 'opt_info': info, }
def perform_test(self, dat, op=None, return_metest=False): """ dat: an instance of Data op: a dictionary specifying options for the optimization of the ME test. Can be None (use default). """ with util.ContextTimer() as t: metest, tr_tst_data, te_tst_data = self._get_metest_opt(dat, op) # Run the two-sample test results = metest.perform_test(te_tst_data) results['time_secs'] = t.secs if return_metest: results['metest'] = metest return results
def perform_test(self, dat, return_simulated_stats=False, return_ustat_gram=False): """ dat: a instance of Data """ with util.ContextTimer() as t: alpha = self.alpha n_simulate = self.n_simulate X = dat.data() n = X.shape[0] _, H = self.compute_stat(dat, return_ustat_gram=True) test_stat = n * np.mean(H) # bootrapping sim_stats = np.zeros(n_simulate) with util.NumpySeedContext(seed=self.seed): for i in range(n_simulate): W = self.bootstrapper(n) # n * [ (1/n^2) * \sum_i \sum_j h(x_i, x_j) w_i w_j ] boot_stat = W.dot(H.dot(old_div(W, float(n)))) # This is a bootstrap version of n*V_n sim_stats[i] = boot_stat # approximate p-value with the permutations pvalue = np.mean(sim_stats > test_stat) results = { 'alpha': self.alpha, 'pvalue': pvalue, 'test_stat': test_stat, 'h0_rejected': pvalue < alpha, 'n_simulate': n_simulate, 'time_secs': t.secs, "H_mu": H.mean(), "H_sigma": H.std() } if return_simulated_stats: results['sim_stats'] = sim_stats if return_ustat_gram: results['H'] = H return results
def job_kstein_imq(p, data_source, tr, te, r): """ Kernel Stein discrepancy test of Liu et al., 2016 and Chwialkowski et al., 2016. Use full sample. Use the inverse multiquadric kernel (IMQ) studied in Measuring Sample Quality with Kernels Gorham and Mackey 2017. Parameters are fixed to the recommented values: beta = b = -0.5, c = 1. """ # full data data = tr + te X = data.data() with util.ContextTimer() as t: k = kernel.KIMQ(b=-0.5, c=1.0) kstein = gof.KernelSteinTest(p, k, alpha=alpha, n_simulate=1000, seed=r) kstein_result = kstein.perform_test(data) return { 'test_result': kstein_result, 'time_secs': t.secs}
def perform_test(self, dat): """ dat: an instance of Data """ with util.ContextTimer() as t: seed = self.seed mmdtest = self.mmdtest p = self.p # Draw sample from p. #sample to draw is the same as that of dat ds = p.get_datasource() p_sample = ds.sample(dat.sample_size(), seed=seed + 12) # Run the two-sample test on p_sample and dat # Make a two-sample test data tst_data = fdata.TSTData(p_sample.data(), dat.data()) # Test results = mmdtest.perform_test(tst_data) results['time_secs'] = t.secs return results
def job_mmd_dgauss_opt(p, data_source, tr, te, r): """ MMD test of Gretton et al., 2012 used as a goodness-of-fit test. Require the ability to sample from p i.e., the UnnormalizedDensity p has to be able to return a non-None from get_datasource() With optimization. Diagonal Gaussian kernel where there is one Gaussian width for each dimension. """ data = tr + te X = data.data() d = X.shape[1] with util.ContextTimer() as t: # median heuristic pds = p.get_datasource() datY = pds.sample(data.sample_size(), seed=r+294) Y = datY.data() XY = np.vstack((X, Y)) # Get the median heuristic for each dimension meds = np.zeros(d) for i in range(d): medi = util.meddistance(XY[:, [i]], subsample=1000) meds[i] = medi # Construct a list of kernels to try based on multiples of the median # heuristic med_factors = 2.0**np.linspace(-4, 4, 20) candidate_kernels = [] for i in range(len(med_factors)): ki = kernel.KDiagGauss( (meds**2)*med_factors[i] ) candidate_kernels.append(ki) mmd_opt = mgof.QuadMMDGofOpt(p, n_permute=300, alpha=alpha, seed=r+56) mmd_result = mmd_opt.perform_test(data, candidate_kernels=candidate_kernels, tr_proportion=tr_proportion, reg=1e-3) return { 'test_result': mmd_result, 'time_secs': t.secs}
def perform_test(self, dat): """ dat: a instance of Data """ with util.ContextTimer() as t: alpha = self.alpha X = dat.data() n = X.shape[0] # H: length-n vector _, H = self.compute_stat(dat, return_pointwise_stats=True) test_stat = np.sqrt(n / 2) * np.mean(H) stat_var = np.mean(H**2) pvalue = stats.norm.sf(test_stat, loc=0, scale=np.sqrt(stat_var)) results = { 'alpha': self.alpha, 'pvalue': pvalue, 'test_stat': test_stat, 'h0_rejected': pvalue < alpha, 'time_secs': t.secs, } return results
def optimize_locs(p, dat, b, c, test_locs0, reg=1e-5, max_iter=100, tol_fun=1e-5, disp=False, locs_bounds_frac=100): """ Optimize just the test locations by maximizing a test power criterion, keeping the kernel parameters b, c fixed to the specified values. data should not be the same data as used in the actual test (i.e., should be a held-out set). This function is deterministic. - p: an UnnormalizedDensity specifying the problem - dat: a Data object - b, c: kernel parameters of the IMQ kernel. Not optimized. - test_locs0: Jxd numpy array. Initial V. - reg: reg to add to the mean/sqrt(variance) criterion to become mean/sqrt(variance + reg) - max_iter: #gradient descent iterations - tol_fun: termination tolerance of the objective value - disp: True to print convergence messages - locs_bounds_frac: When making box bounds for the test_locs, extend the box defined by coordinate-wise min-max by std of each coordinate multiplied by this number. Return (V test_locs, optimization info log) """ J = test_locs0.shape[0] X = dat.data() n, d = X.shape def obj(V): return -IMQFSSD.power_criterion(p, dat, b, c, V, reg=reg) flatten = lambda V: np.reshape(V, -1) def unflatten(x): V = np.reshape(x, (J, d)) return V def flat_obj(x): V = unflatten(x) return obj(V) # Initial point x0 = flatten(test_locs0) # Make a box to bound test locations X_std = np.std(X, axis=0) # X_min: length-d array X_min = np.min(X, axis=0) X_max = np.max(X, axis=0) # V_lb: J x d V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1)) V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1)) # (J*d) x 2. x0_bounds = zip( V_lb.reshape(-1)[:, np.newaxis], V_ub.reshape(-1)[:, np.newaxis]) # optimize. Time the optimization as well. # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html grad_obj = autograd.elementwise_grad(flat_obj) with util.ContextTimer() as timer: opt_result = scipy.optimize.minimize( flat_obj, x0, method='L-BFGS-B', bounds=x0_bounds, tol=tol_fun, options={ 'maxiter': max_iter, 'ftol': tol_fun, 'disp': disp, 'gtol': 1.0e-06, }, jac=grad_obj, ) opt_result = dict(opt_result) opt_result['time_secs'] = timer.secs x_opt = opt_result['x'] V_opt = unflatten(x_opt) return V_opt, opt_result
def optimize_locs_widths( p, dat, gwidth0, test_locs0, reg=1e-2, max_iter=100, tol_fun=1e-5, disp=False, locs_bounds_frac=100, gwidth_lb=None, gwidth_ub=None, use_2terms=False, ): """ Optimize the test locations and the Gaussian kernel width by maximizing a test power criterion. data should not be the same data as used in the actual test (i.e., should be a held-out set). This function is deterministic. - data: a Data object - test_locs0: Jxd numpy array. Initial V. - reg: reg to add to the mean/sqrt(variance) criterion to become mean/sqrt(variance + reg) - gwidth0: initial value of the Gaussian width^2 - max_iter: #gradient descent iterations - tol_fun: termination tolerance of the objective value - disp: True to print convergence messages - locs_bounds_frac: When making box bounds for the test_locs, extend the box defined by coordinate-wise min-max by std of each coordinate multiplied by this number. - gwidth_lb: absolute lower bound on the Gaussian width^2 - gwidth_ub: absolute upper bound on the Gaussian width^2 - use_2terms: If True, then besides the signal-to-noise ratio criterion, the objective function will also include the first term that is dropped. #- If the lb, ub bounds are None, use fraction of the median heuristics # to automatically set the bounds. Return (V test_locs, gaussian width, optimization info log) """ J = test_locs0.shape[0] X = dat.data() n, d = X.shape # Parameterize the Gaussian width with its square root (then square later) # to automatically enforce the positivity. def obj(sqrt_gwidth, V): return -GaussFSSD.power_criterion( p, dat, sqrt_gwidth**2, V, reg=reg, use_2terms=use_2terms) flatten = lambda gwidth, V: np.hstack((gwidth, V.reshape(-1))) def unflatten(x): sqrt_gwidth = x[0] V = np.reshape(x[1:], (J, d)) return sqrt_gwidth, V def flat_obj(x): sqrt_gwidth, V = unflatten(x) return obj(sqrt_gwidth, V) # gradient #grad_obj = autograd.elementwise_grad(flat_obj) # Initial point x0 = flatten(np.sqrt(gwidth0), test_locs0) #make sure that the optimized gwidth is not too small or too large. fac_min = 1e-2 fac_max = 1e2 med2 = util.meddistance(X, subsample=1000)**2 if gwidth_lb is None: gwidth_lb = max(fac_min * med2, 1e-3) if gwidth_ub is None: gwidth_ub = min(fac_max * med2, 1e5) # Make a box to bound test locations X_std = np.std(X, axis=0) # X_min: length-d array X_min = np.min(X, axis=0) X_max = np.max(X, axis=0) # V_lb: J x d V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1)) V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1)) # (J*d+1) x 2. Take square root because we parameterize with the square # root x0_lb = np.hstack((np.sqrt(gwidth_lb), np.reshape(V_lb, -1))) x0_ub = np.hstack((np.sqrt(gwidth_ub), np.reshape(V_ub, -1))) x0_bounds = zip(x0_lb, x0_ub) # optimize. Time the optimization as well. # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html grad_obj = autograd.elementwise_grad(flat_obj) with util.ContextTimer() as timer: opt_result = scipy.optimize.minimize( flat_obj, x0, method='L-BFGS-B', bounds=x0_bounds, tol=tol_fun, options={ 'maxiter': max_iter, 'ftol': tol_fun, 'disp': disp, 'gtol': 1.0e-06, }, jac=grad_obj, ) opt_result = dict(opt_result) opt_result['time_secs'] = timer.secs x_opt = opt_result['x'] sq_gw_opt, V_opt = unflatten(x_opt) gw_opt = sq_gw_opt**2 assert util.is_real_num( gw_opt), 'gw_opt is not real. Was %s' % str(gw_opt) return V_opt, gw_opt, opt_result
def optimize_locs_params( p, dat, b0, c0, test_locs0, reg=1e-2, max_iter=100, tol_fun=1e-5, disp=False, locs_bounds_frac=100, b_lb=-20.0, b_ub=-1e-4, c_lb=1e-6, c_ub=1e3, ): """ Optimize the test locations and the the two parameters (b and c) of the IMQ kernel by maximizing the test power criterion. k(x,y) = (c^2 + ||x-y||^2)^b where c > 0 and b < 0. data should not be the same data as used in the actual test (i.e., should be a held-out set). This function is deterministic. - p: UnnormalizedDensity specifying the problem. - b0: initial parameter value for b (in the kernel) - c0: initial parameter value for c (in the kernel) - dat: a Data object (training set) - test_locs0: Jxd numpy array. Initial V. - reg: reg to add to the mean/sqrt(variance) criterion to become mean/sqrt(variance + reg) - max_iter: #gradient descent iterations - tol_fun: termination tolerance of the objective value - disp: True to print convergence messages - locs_bounds_frac: When making box bounds for the test_locs, extend the box defined by coordinate-wise min-max by std of each coordinate multiplied by this number. - b_lb: absolute lower bound on b. b is always < 0. - b_ub: absolute upper bound on b - c_lb: absolute lower bound on c. c is always > 0. - c_ub: absolute upper bound on c #- If the lb, ub bounds are None Return (V test_locs, b, c, optimization info log) """ """ In the optimization, we will parameterize b with its square root. Square back and negate to form b. c is not parameterized in any special way since it enters to the kernel with c^2. Absolute value of c will be taken to make sure it is positive. """ J = test_locs0.shape[0] X = dat.data() n, d = X.shape def obj(sqrt_neg_b, c, V): b = -sqrt_neg_b**2 return -IMQFSSD.power_criterion(p, dat, b, c, V, reg=reg) flatten = lambda sqrt_neg_b, c, V: np.hstack( (sqrt_neg_b, c, V.reshape(-1))) def unflatten(x): sqrt_neg_b = x[0] c = x[1] V = np.reshape(x[2:], (J, d)) return sqrt_neg_b, c, V def flat_obj(x): sqrt_neg_b, c, V = unflatten(x) return obj(sqrt_neg_b, c, V) # gradient #grad_obj = autograd.elementwise_grad(flat_obj) # Initial point b02 = np.sqrt(-b0) x0 = flatten(b02, c0, test_locs0) # Make a box to bound test locations X_std = np.std(X, axis=0) # X_min: length-d array X_min = np.min(X, axis=0) X_max = np.max(X, axis=0) # V_lb: J x d V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1)) V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1)) # (J*d+2) x 2. Make sure to bound the reparamterized values (not the original) """ For b, b2 := sqrt(-b) lb <= b <= ub < 0 means sqrt(-ub) <= b2 <= sqrt(-lb) Note the positions of ub, lb. """ x0_lb = np.hstack((np.sqrt(-b_ub), c_lb, np.reshape(V_lb, -1))) x0_ub = np.hstack((np.sqrt(-b_lb), c_ub, np.reshape(V_ub, -1))) x0_bounds = zip(x0_lb, x0_ub) # optimize. Time the optimization as well. # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html grad_obj = autograd.elementwise_grad(flat_obj) with util.ContextTimer() as timer: opt_result = scipy.optimize.minimize( flat_obj, x0, method='L-BFGS-B', bounds=x0_bounds, tol=tol_fun, options={ 'maxiter': max_iter, 'ftol': tol_fun, 'disp': disp, 'gtol': 1.0e-06, }, jac=grad_obj, ) opt_result = dict(opt_result) opt_result['time_secs'] = timer.secs x_opt = opt_result['x'] sqrt_neg_b, c, V_opt = unflatten(x_opt) b = -sqrt_neg_b**2 assert util.is_real_num(b), 'b is not real. Was {}'.format(b) assert b < 0 assert util.is_real_num(c), 'c is not real. Was {}'.format(c) assert c > 0 return V_opt, b, c, opt_result