Пример #1
0
def job_me_opt(p, data_source, tr, te, r, J=5):
    """
    ME test of Jitkrittum et al., 2016 used as a goodness-of-fit test.
    Gaussian kernel. Optimize test locations and Gaussian width.
    """
    data = tr + te
    X = data.data()
    with util.ContextTimer() as t:
        # median heuristic
        #pds = p.get_datasource()
        #datY = pds.sample(data.sample_size(), seed=r+294)
        #Y = datY.data()
        #XY = np.vstack((X, Y))
        #med = util.meddistance(XY, subsample=1000)
        op = {
            'n_test_locs': J,
            'seed': r + 5,
            'max_iter': 40,
            'batch_proportion': 1.0,
            'locs_step_size': 1.0,
            'gwidth_step_size': 0.1,
            'tol_fun': 1e-4,
            'reg': 1e-4
        }
        # optimize on the training set
        me_opt = tgof.GaussMETestOpt(p,
                                     n_locs=J,
                                     tr_proportion=tr_proportion,
                                     alpha=alpha,
                                     seed=r + 111)

        me_result = me_opt.perform_test(data, op)
    return {'test_result': me_result, 'time_secs': t.secs}
Пример #2
0
    def perform_test(self, dat, return_simulated_stats=False):
        """
        dat: an instance of Data
        """
        with util.ContextTimer() as t:
            alpha = self.alpha
            null_sim = self.null_sim
            n_simulate = null_sim.n_simulate
            X = dat.data()
            n = X.shape[0]
            J = self.V.shape[0]

            nfssd, fea_tensor = self.compute_stat(dat,
                                                  return_feature_tensor=True)
            sim_results = null_sim.simulate(self, dat, fea_tensor)
            arr_nfssd = sim_results['sim_stats']

            # approximate p-value with the permutations
            pvalue = np.mean(arr_nfssd > nfssd)

        results = {
            'alpha': self.alpha,
            'pvalue': pvalue,
            'test_stat': nfssd,
            'h0_rejected': pvalue < alpha,
            'n_simulate': n_simulate,
            'time_secs': t.secs,
        }
        if return_simulated_stats:
            results['sim_stats'] = arr_nfssd
        return results
Пример #3
0
def job_fssdJ1q_med(p, data_source, tr, te, r, J=1, null_sim=None):
    """
    FSSD test with a Gaussian kernel, where the test locations are randomized,
    and the Gaussian width is set with the median heuristic. Use full sample.
    No training/testing splits.

    p: an UnnormalizedDensity
    data_source: a DataSource
    tr, te: Data
    r: trial number (positive integer)
    """
    if null_sim is None:
        null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=r)

    # full data
    data = tr + te
    X = data.data()
    with util.ContextTimer() as t:
        # median heuristic
        med = util.meddistance(X, subsample=1000)
        k = kernel.KGauss(med**2)
        V = util.fit_gaussian_draw(X, J, seed=r + 3)

        fssd_med = gof.FSSD(p, k, V, null_sim=null_sim, alpha=alpha)
        fssd_med_result = fssd_med.perform_test(data)
    return {
        'goftest': fssd_med,
        'test_result': fssd_med_result,
        'time_secs': t.secs
    }
Пример #4
0
def job_mmd_med(p, data_source, tr, te, r):
    """
    MMD test of Gretton et al., 2012 used as a goodness-of-fit test.
    Require the ability to sample from p i.e., the UnnormalizedDensity p has 
    to be able to return a non-None from get_datasource()
    """
    # full data
    data = tr + te
    X = data.data()
    with util.ContextTimer() as t:
        # median heuristic
        pds = p.get_datasource()
        datY = pds.sample(data.sample_size(), seed=r + 294)
        Y = datY.data()
        XY = np.vstack((X, Y))

        # If p, q differ very little, the median may be very small, rejecting H0
        # when it should not?
        medx = util.meddistance(X, subsample=1000)
        medy = util.meddistance(Y, subsample=1000)
        medxy = util.meddistance(XY, subsample=1000)
        med_avg = (medx + medy + medxy) / 3.0
        k = kernel.KGauss(med_avg**2)

        mmd_test = mgof.QuadMMDGof(p, k, n_permute=400, alpha=alpha, seed=r)
        mmd_result = mmd_test.perform_test(data)
    return {'test_result': mmd_result, 'time_secs': t.secs}
Пример #5
0
def job_mmd_opt(p, data_source, tr, te, r):
    """
    MMD test of Gretton et al., 2012 used as a goodness-of-fit test.
    Require the ability to sample from p i.e., the UnnormalizedDensity p has 
    to be able to return a non-None from get_datasource()

    With optimization. Gaussian kernel.
    """
    data = tr + te
    X = data.data()
    with util.ContextTimer() as t:
        # median heuristic 
        pds = p.get_datasource()
        datY = pds.sample(data.sample_size(), seed=r+294)
        Y = datY.data()
        XY = np.vstack((X, Y))

        med = util.meddistance(XY, subsample=1000)

        # Construct a list of kernels to try based on multiples of the median
        # heuristic
        #list_gwidth = np.hstack( (np.linspace(20, 40, 10), (med**2)
        #    *(2.0**np.linspace(-2, 2, 20) ) ) )
        list_gwidth = (med**2)*(2.0**np.linspace(-4, 4, 30) ) 
        list_gwidth.sort()
        candidate_kernels = [kernel.KGauss(gw2) for gw2 in list_gwidth]

        mmd_opt = mgof.QuadMMDGofOpt(p, n_permute=300, alpha=alpha, seed=r)
        mmd_result = mmd_opt.perform_test(data,
                candidate_kernels=candidate_kernels,
                tr_proportion=tr_proportion, reg=1e-3)
    return { 'test_result': mmd_result, 'time_secs': t.secs}
Пример #6
0
    def compute(self):

        p = self.p
        data_source = self.data_source
        r = self.rep
        prob_param = self.prob_param
        job_func = self.job_func
        # sample_size is a global variable
        data = data_source.sample(sample_size, seed=r)
        with util.ContextTimer() as t:
            tr, te = data.split_tr_te(tr_proportion=tr_proportion, seed=r + 21)
            prob_label = self.prob_label
            logger.info("computing. %s. prob=%s, r=%d,\
                    param=%.3g" %
                        (job_func.__name__, prob_label, r, prob_param))

            job_result = job_func(p, data_source, tr, te, r)

            # create ScalarResult instance
            result = SingleResult(job_result)
            # submit the result to my own aggregator
            self.aggregator.submit_result(result)
            func_name = job_func.__name__
        logger.info("done. ex2: %s, prob=%s, r=%d, param=%.3g. Took: %.3g s " %
                    (func_name, prob_label, r, prob_param, t.secs))

        # save result
        fname = '%s-%s-n%d_r%d_p%g_a%.3f_trp%.2f.p' \
                %(prob_label, func_name, sample_size, r, prob_param, alpha,
                        tr_proportion)
        glo.ex_save_result(ex, job_result, prob_label, fname)
Пример #7
0
 def job_mmd_opt(p, data_source, tr, te, r, model_sample):
     # full data
     data = tr + te
     X = data.data()
     with util.ContextTimer() as t:
         mmd = gof.QuadMMDGofOpt(p, alpha=args.alpha, seed=r)
         mmd_result = mmd.perform_test(data, model_sample)
     return {'test_result': mmd_result, 'time_secs': t.secs}
Пример #8
0
    def perform_test(self,
                     dat,
                     model_sample,
                     candidate_kernels=None,
                     return_mmdtest=False,
                     tr_proportion=0.2,
                     reg=1e-3):
        """
        dat: an instance of Data
        candidate_kernels: a list of Kernel's to choose from
        tr_proportion: proportion of sample to be used to choosing the best
            kernel
        reg: regularization parameter for the test power criterion
        """
        with util.ContextTimer() as t:
            seed = self.seed
            p = self.p
            #ds = p.get_datasource()
            #p_sample = ds.sample(dat.sample_size(), seed=seed+77)
            p_sample = model_sample
            xtr, xte = p_sample.split_tr_te(tr_proportion=tr_proportion,
                                            seed=seed + 18)
            # ytr, yte are of type data.Data
            ytr, yte = dat.split_tr_te(tr_proportion=tr_proportion,
                                       seed=seed + 12)

            # training and test data
            tr_tst_data = fdata.TSTData(xtr.data(), ytr.data())
            te_tst_data = fdata.TSTData(xte.data(), yte.data())

            if candidate_kernels is None:
                # Assume a Gaussian kernel. Construct a list of
                # kernels to try based on multiples of the median heuristic
                med = util.meddistance(tr_tst_data.stack_xy(), 1000)
                list_gwidth = np.hstack(
                    ((med**2) * (2.0**np.linspace(-4, 4, 10))))
                list_gwidth.sort()
                candidate_kernels = [kernel.KGauss(gw2) for gw2 in list_gwidth]

            alpha = self.alpha

            # grid search to choose the best Gaussian width
            besti, powers = tst.QuadMMDTest.grid_search_kernel(
                tr_tst_data, candidate_kernels, alpha, reg=reg)
            # perform test
            best_ker = candidate_kernels[besti]
            mmdtest = tst.QuadMMDTest(best_ker, self.n_permute, alpha=alpha)
            results = mmdtest.perform_test(te_tst_data)
            if return_mmdtest:
                results['mmdtest'] = mmdtest

        results['time_secs'] = t.secs
        return results
Пример #9
0
def job_fssdJ1q_imq_optv(p,
                         data_source,
                         tr,
                         te,
                         r,
                         J=1,
                         b=-0.5,
                         null_sim=None):
    """
    FSSD with optimization on tr. Test on te. Use an inverse multiquadric
    kernel (IMQ). Optimize only the test locations (V). Fix the kernel
    parameters to b = -0.5, c=1. These are the recommended values from

        Measuring Sample Quality with Kernels
        Jackson Gorham, Lester Mackey
    """
    if null_sim is None:
        null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=r)

    Xtr = tr.data()
    with util.ContextTimer() as t:
        # IMQ kernel parameters: b and c
        c = 1.0

        # fit a Gaussian to the data and draw to initialize V0
        V0 = util.fit_gaussian_draw(Xtr, J, seed=r + 1, reg=1e-6)

        ops = {
            'reg': 1e-5,
            'max_iter': 30,
            'tol_fun': 1e-6,
            'disp': True,
            'locs_bounds_frac': 20.0,
        }

        V_opt, info = gof.IMQFSSD.optimize_locs(p, tr, b, c, V0, **ops)

        k_imq = kernel.KIMQ(b=b, c=c)

        # Use the optimized parameters to construct a test
        fssd_imq = gof.FSSD(p, k_imq, V_opt, null_sim=null_sim, alpha=alpha)
        fssd_imq_result = fssd_imq.perform_test(te)

    return {
        'test_result': fssd_imq_result,
        'time_secs': t.secs,
        'goftest': fssd_imq,
        'opt_info': info,
    }
Пример #10
0
        def job_kstein_med(p, data_source, tr, te, r):
            """
            Kernel Stein discrepancy test of Liu et al., 2016 and Chwialkowski et al.,
            2016. Use full sample. Use Gaussian kernel.
            """
            # full data
            data = tr + te
            X = data.data()
            with util.ContextTimer() as t:
                # median heuristic
                med = util.meddistance(X, subsample=1000)
                k = kernel.KGauss(med ** 2)

                kstein = gof.KernelSteinTest(p, k, alpha=args.alpha, n_simulate=1000, seed=r)
                kstein_result = kstein.perform_test(data)
            return {'test_result': kstein_result, 'time_secs': t.secs}
Пример #11
0
def job_lin_kstein_med(p, data_source, tr, te, r):
    """
    Linear-time version of the kernel Stein discrepancy test of Liu et al.,
    2016 and Chwialkowski et al., 2016. Use full sample.
    """
    # full data
    data = tr + te
    X = data.data()
    with util.ContextTimer() as t:
        # median heuristic
        med = util.meddistance(X, subsample=1000)
        k = kernel.KGauss(med**2)

        lin_kstein = gof.LinearKernelSteinTest(p, k, alpha=alpha, seed=r)
        lin_kstein_result = lin_kstein.perform_test(data)
    return {'test_result': lin_kstein_result, 'time_secs': t.secs}
Пример #12
0
def job_fssdJ1q_opt(p, data_source, tr, te, r, J=1, null_sim=None):
    """
    FSSD with optimization on tr. Test on te. Use a Gaussian kernel.
    """
    if null_sim is None:
        null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=r)

    Xtr = tr.data()
    with util.ContextTimer() as t:
        # Use grid search to initialize the gwidth
        n_gwidth_cand = 5
        gwidth_factors = 2.0**np.linspace(-3, 3, n_gwidth_cand)
        med2 = util.meddistance(Xtr, 1000)**2

        k = kernel.KGauss(med2 * 2)
        # fit a Gaussian to the data and draw to initialize V0
        V0 = util.fit_gaussian_draw(Xtr, J, seed=r + 1, reg=1e-6)
        list_gwidth = np.hstack(((med2) * gwidth_factors))
        besti, objs = gof.GaussFSSD.grid_search_gwidth(p, tr, V0, list_gwidth)
        gwidth = list_gwidth[besti]
        assert util.is_real_num(
            gwidth), 'gwidth not real. Was %s' % str(gwidth)
        assert gwidth > 0, 'gwidth not positive. Was %.3g' % gwidth
        logging.info('After grid search, gwidth=%.3g' % gwidth)

        ops = {
            'reg': 1e-2,
            'max_iter': 40,
            'tol_fun': 1e-4,
            'disp': True,
            'locs_bounds_frac': 10.0,
            'gwidth_lb': 1e-1,
            'gwidth_ub': 1e4,
        }

        V_opt, gwidth_opt, info = gof.GaussFSSD.optimize_locs_widths(
            p, tr, gwidth, V0, **ops)
        # Use the optimized parameters to construct a test
        k_opt = kernel.KGauss(gwidth_opt)
        fssd_opt = gof.FSSD(p, k_opt, V_opt, null_sim=null_sim, alpha=alpha)
        fssd_opt_result = fssd_opt.perform_test(te)
    return {
        'test_result': fssd_opt_result,
        'time_secs': t.secs,
        'goftest': fssd_opt,
        'opt_info': info,
    }
Пример #13
0
def job_fssdJ1q_imq_optbv(p, data_source, tr, te, r, J=1, null_sim=None):
    """
    FSSD with optimization on tr. Test on te. Use an inverse multiquadric
    kernel (IMQ). Optimize the test locations (V), and b. Fix c (in the kernel)
    """
    if null_sim is None:
        null_sim = gof.FSSDH0SimCovObs(n_simulate=2000, seed=r)

    Xtr = tr.data()
    with util.ContextTimer() as t:
        # Initial IMQ kernel parameters: b and c
        b0 = -0.5
        # Fix c to this value
        c = 1.0
        c0 = c

        # fit a Gaussian to the data and draw to initialize V0
        V0 = util.fit_gaussian_draw(Xtr, J, seed=r + 1, reg=1e-6)

        ops = {
            'reg': 1e-5,
            'max_iter': 40,
            'tol_fun': 1e-6,
            'disp': True,
            'locs_bounds_frac': 20.0,
            # IMQ kernel bounds
            'b_lb': -20,
            'c_lb': c,
            'c_ub': c,
        }

        V_opt, b_opt, c_opt, info = gof.IMQFSSD.optimize_locs_params(
            p, tr, b0, c0, V0, **ops)

        k_imq = kernel.KIMQ(b=b_opt, c=c_opt)

        # Use the optimized parameters to construct a test
        fssd_imq = gof.FSSD(p, k_imq, V_opt, null_sim=null_sim, alpha=alpha)
        fssd_imq_result = fssd_imq.perform_test(te)

    return {
        'test_result': fssd_imq_result,
        'time_secs': t.secs,
        'goftest': fssd_imq,
        'opt_info': info,
    }
Пример #14
0
    def perform_test(self, dat, op=None, return_metest=False):
        """
        dat: an instance of Data
        op: a dictionary specifying options for the optimization of the ME test.
            Can be None (use default).
        """

        with util.ContextTimer() as t:
            metest, tr_tst_data, te_tst_data = self._get_metest_opt(dat, op)

            # Run the two-sample test 
            results = metest.perform_test(te_tst_data)

        results['time_secs'] = t.secs
        if return_metest:
            results['metest'] = metest
        return results
Пример #15
0
    def perform_test(self,
                     dat,
                     return_simulated_stats=False,
                     return_ustat_gram=False):
        """
        dat: a instance of Data
        """
        with util.ContextTimer() as t:
            alpha = self.alpha
            n_simulate = self.n_simulate
            X = dat.data()
            n = X.shape[0]

            _, H = self.compute_stat(dat, return_ustat_gram=True)
            test_stat = n * np.mean(H)
            # bootrapping
            sim_stats = np.zeros(n_simulate)
            with util.NumpySeedContext(seed=self.seed):
                for i in range(n_simulate):
                    W = self.bootstrapper(n)
                    # n * [ (1/n^2) * \sum_i \sum_j h(x_i, x_j) w_i w_j ]
                    boot_stat = W.dot(H.dot(old_div(W, float(n))))
                    # This is a bootstrap version of n*V_n
                    sim_stats[i] = boot_stat

            # approximate p-value with the permutations
            pvalue = np.mean(sim_stats > test_stat)

        results = {
            'alpha': self.alpha,
            'pvalue': pvalue,
            'test_stat': test_stat,
            'h0_rejected': pvalue < alpha,
            'n_simulate': n_simulate,
            'time_secs': t.secs,
            "H_mu": H.mean(),
            "H_sigma": H.std()
        }
        if return_simulated_stats:
            results['sim_stats'] = sim_stats
        if return_ustat_gram:
            results['H'] = H

        return results
Пример #16
0
def job_kstein_imq(p, data_source, tr, te, r):
    """
    Kernel Stein discrepancy test of Liu et al., 2016 and Chwialkowski et al.,
    2016. Use full sample. Use the inverse multiquadric kernel (IMQ) studied 
    in 

    Measuring Sample Quality with Kernels
    Gorham and Mackey 2017. 

    Parameters are fixed to the recommented values: beta = b = -0.5, c = 1. 
    """
    # full data
    data = tr + te
    X = data.data()
    with util.ContextTimer() as t:
        k = kernel.KIMQ(b=-0.5, c=1.0)

        kstein = gof.KernelSteinTest(p, k, alpha=alpha, n_simulate=1000, seed=r)
        kstein_result = kstein.perform_test(data)
    return { 'test_result': kstein_result, 'time_secs': t.secs}
Пример #17
0
    def perform_test(self, dat):
        """
        dat: an instance of Data
        """
        with util.ContextTimer() as t:
            seed = self.seed
            mmdtest = self.mmdtest
            p = self.p

            # Draw sample from p. #sample to draw is the same as that of dat
            ds = p.get_datasource()
            p_sample = ds.sample(dat.sample_size(), seed=seed + 12)

            # Run the two-sample test on p_sample and dat
            # Make a two-sample test data
            tst_data = fdata.TSTData(p_sample.data(), dat.data())
            # Test
            results = mmdtest.perform_test(tst_data)

        results['time_secs'] = t.secs
        return results
Пример #18
0
def job_mmd_dgauss_opt(p, data_source, tr, te, r):
    """
    MMD test of Gretton et al., 2012 used as a goodness-of-fit test.
    Require the ability to sample from p i.e., the UnnormalizedDensity p has 
    to be able to return a non-None from get_datasource()

    With optimization. Diagonal Gaussian kernel where there is one Gaussian width
    for each dimension.
    """
    data = tr + te
    X = data.data()
    d = X.shape[1]
    with util.ContextTimer() as t:
        # median heuristic 
        pds = p.get_datasource()
        datY = pds.sample(data.sample_size(), seed=r+294)
        Y = datY.data()
        XY = np.vstack((X, Y))

        # Get the median heuristic for each dimension
        meds = np.zeros(d)
        for i in range(d):
            medi = util.meddistance(XY[:, [i]], subsample=1000)
            meds[i] = medi

        # Construct a list of kernels to try based on multiples of the median
        # heuristic
        med_factors = 2.0**np.linspace(-4, 4, 20)  
        candidate_kernels = []
        for i in range(len(med_factors)):
            ki = kernel.KDiagGauss( (meds**2)*med_factors[i] )
            candidate_kernels.append(ki)

        mmd_opt = mgof.QuadMMDGofOpt(p, n_permute=300, alpha=alpha, seed=r+56)
        mmd_result = mmd_opt.perform_test(data,
                candidate_kernels=candidate_kernels,
                tr_proportion=tr_proportion, reg=1e-3)
    return { 'test_result': mmd_result, 'time_secs': t.secs}
Пример #19
0
    def perform_test(self, dat):
        """
        dat: a instance of Data
        """
        with util.ContextTimer() as t:
            alpha = self.alpha
            X = dat.data()
            n = X.shape[0]

            # H: length-n vector
            _, H = self.compute_stat(dat, return_pointwise_stats=True)
            test_stat = np.sqrt(n / 2) * np.mean(H)
            stat_var = np.mean(H**2)
            pvalue = stats.norm.sf(test_stat, loc=0, scale=np.sqrt(stat_var))

        results = {
            'alpha': self.alpha,
            'pvalue': pvalue,
            'test_stat': test_stat,
            'h0_rejected': pvalue < alpha,
            'time_secs': t.secs,
        }
        return results
Пример #20
0
    def optimize_locs(p,
                      dat,
                      b,
                      c,
                      test_locs0,
                      reg=1e-5,
                      max_iter=100,
                      tol_fun=1e-5,
                      disp=False,
                      locs_bounds_frac=100):
        """
        Optimize just the test locations by maximizing a test power criterion,
        keeping the kernel parameters b, c fixed to the specified values. data
        should not be the same data as used in the actual test (i.e., should be
        a held-out set). This function is deterministic.

        - p: an UnnormalizedDensity specifying the problem
        - dat: a Data object
        - b, c: kernel parameters of the IMQ kernel. Not optimized.
        - test_locs0: Jxd numpy array. Initial V.
        - reg: reg to add to the mean/sqrt(variance) criterion to become
            mean/sqrt(variance + reg)
        - max_iter: #gradient descent iterations
        - tol_fun: termination tolerance of the objective value
        - disp: True to print convergence messages
        - locs_bounds_frac: When making box bounds for the test_locs, extend
            the box defined by coordinate-wise min-max by std of each coordinate
            multiplied by this number.
        
        Return (V test_locs, optimization info log)
        """
        J = test_locs0.shape[0]
        X = dat.data()
        n, d = X.shape

        def obj(V):
            return -IMQFSSD.power_criterion(p, dat, b, c, V, reg=reg)

        flatten = lambda V: np.reshape(V, -1)

        def unflatten(x):
            V = np.reshape(x, (J, d))
            return V

        def flat_obj(x):
            V = unflatten(x)
            return obj(V)

        # Initial point
        x0 = flatten(test_locs0)

        # Make a box to bound test locations
        X_std = np.std(X, axis=0)
        # X_min: length-d array
        X_min = np.min(X, axis=0)
        X_max = np.max(X, axis=0)
        # V_lb: J x d
        V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1))
        V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1))
        # (J*d) x 2.
        x0_bounds = zip(
            V_lb.reshape(-1)[:, np.newaxis],
            V_ub.reshape(-1)[:, np.newaxis])

        # optimize. Time the optimization as well.
        # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html
        grad_obj = autograd.elementwise_grad(flat_obj)
        with util.ContextTimer() as timer:
            opt_result = scipy.optimize.minimize(
                flat_obj,
                x0,
                method='L-BFGS-B',
                bounds=x0_bounds,
                tol=tol_fun,
                options={
                    'maxiter': max_iter,
                    'ftol': tol_fun,
                    'disp': disp,
                    'gtol': 1.0e-06,
                },
                jac=grad_obj,
            )

        opt_result = dict(opt_result)
        opt_result['time_secs'] = timer.secs
        x_opt = opt_result['x']
        V_opt = unflatten(x_opt)
        return V_opt, opt_result
Пример #21
0
    def optimize_locs_widths(
        p,
        dat,
        gwidth0,
        test_locs0,
        reg=1e-2,
        max_iter=100,
        tol_fun=1e-5,
        disp=False,
        locs_bounds_frac=100,
        gwidth_lb=None,
        gwidth_ub=None,
        use_2terms=False,
    ):
        """
        Optimize the test locations and the Gaussian kernel width by 
        maximizing a test power criterion. data should not be the same data as
        used in the actual test (i.e., should be a held-out set). 
        This function is deterministic.

        - data: a Data object
        - test_locs0: Jxd numpy array. Initial V.
        - reg: reg to add to the mean/sqrt(variance) criterion to become
            mean/sqrt(variance + reg)
        - gwidth0: initial value of the Gaussian width^2
        - max_iter: #gradient descent iterations
        - tol_fun: termination tolerance of the objective value
        - disp: True to print convergence messages
        - locs_bounds_frac: When making box bounds for the test_locs, extend
            the box defined by coordinate-wise min-max by std of each coordinate
            multiplied by this number.
        - gwidth_lb: absolute lower bound on the Gaussian width^2
        - gwidth_ub: absolute upper bound on the Gaussian width^2
        - use_2terms: If True, then besides the signal-to-noise ratio
          criterion, the objective function will also include the first term
          that is dropped.

        #- If the lb, ub bounds are None, use fraction of the median heuristics 
        #    to automatically set the bounds.
        
        Return (V test_locs, gaussian width, optimization info log)
        """
        J = test_locs0.shape[0]
        X = dat.data()
        n, d = X.shape

        # Parameterize the Gaussian width with its square root (then square later)
        # to automatically enforce the positivity.
        def obj(sqrt_gwidth, V):
            return -GaussFSSD.power_criterion(
                p, dat, sqrt_gwidth**2, V, reg=reg, use_2terms=use_2terms)

        flatten = lambda gwidth, V: np.hstack((gwidth, V.reshape(-1)))

        def unflatten(x):
            sqrt_gwidth = x[0]
            V = np.reshape(x[1:], (J, d))
            return sqrt_gwidth, V

        def flat_obj(x):
            sqrt_gwidth, V = unflatten(x)
            return obj(sqrt_gwidth, V)

        # gradient
        #grad_obj = autograd.elementwise_grad(flat_obj)
        # Initial point
        x0 = flatten(np.sqrt(gwidth0), test_locs0)

        #make sure that the optimized gwidth is not too small or too large.
        fac_min = 1e-2
        fac_max = 1e2
        med2 = util.meddistance(X, subsample=1000)**2
        if gwidth_lb is None:
            gwidth_lb = max(fac_min * med2, 1e-3)
        if gwidth_ub is None:
            gwidth_ub = min(fac_max * med2, 1e5)

        # Make a box to bound test locations
        X_std = np.std(X, axis=0)
        # X_min: length-d array
        X_min = np.min(X, axis=0)
        X_max = np.max(X, axis=0)
        # V_lb: J x d
        V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1))
        V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1))
        # (J*d+1) x 2. Take square root because we parameterize with the square
        # root
        x0_lb = np.hstack((np.sqrt(gwidth_lb), np.reshape(V_lb, -1)))
        x0_ub = np.hstack((np.sqrt(gwidth_ub), np.reshape(V_ub, -1)))
        x0_bounds = zip(x0_lb, x0_ub)

        # optimize. Time the optimization as well.
        # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html
        grad_obj = autograd.elementwise_grad(flat_obj)
        with util.ContextTimer() as timer:
            opt_result = scipy.optimize.minimize(
                flat_obj,
                x0,
                method='L-BFGS-B',
                bounds=x0_bounds,
                tol=tol_fun,
                options={
                    'maxiter': max_iter,
                    'ftol': tol_fun,
                    'disp': disp,
                    'gtol': 1.0e-06,
                },
                jac=grad_obj,
            )

        opt_result = dict(opt_result)
        opt_result['time_secs'] = timer.secs
        x_opt = opt_result['x']
        sq_gw_opt, V_opt = unflatten(x_opt)
        gw_opt = sq_gw_opt**2

        assert util.is_real_num(
            gw_opt), 'gw_opt is not real. Was %s' % str(gw_opt)

        return V_opt, gw_opt, opt_result
Пример #22
0
    def optimize_locs_params(
        p,
        dat,
        b0,
        c0,
        test_locs0,
        reg=1e-2,
        max_iter=100,
        tol_fun=1e-5,
        disp=False,
        locs_bounds_frac=100,
        b_lb=-20.0,
        b_ub=-1e-4,
        c_lb=1e-6,
        c_ub=1e3,
    ):
        """
        Optimize the test locations and the the two parameters (b and c) of the
        IMQ kernel by maximizing the test power criterion. 
             k(x,y) = (c^2 + ||x-y||^2)^b 
            where c > 0 and b < 0. 
        data should not be the same data as used in the actual test (i.e.,
        should be a held-out set). This function is deterministic.

        - p: UnnormalizedDensity specifying the problem.
        - b0: initial parameter value for b (in the kernel)
        - c0: initial parameter value for c (in the kernel)
        - dat: a Data object (training set)
        - test_locs0: Jxd numpy array. Initial V.
        - reg: reg to add to the mean/sqrt(variance) criterion to become
            mean/sqrt(variance + reg)
        - max_iter: #gradient descent iterations
        - tol_fun: termination tolerance of the objective value
        - disp: True to print convergence messages
        - locs_bounds_frac: When making box bounds for the test_locs, extend
            the box defined by coordinate-wise min-max by std of each coordinate
            multiplied by this number.
        - b_lb: absolute lower bound on b. b is always < 0.
        - b_ub: absolute upper bound on b
        - c_lb: absolute lower bound on c. c is always > 0.
        - c_ub: absolute upper bound on c

        #- If the lb, ub bounds are None 
        
        Return (V test_locs, b, c, optimization info log)
        """
        """
        In the optimization, we will parameterize b with its square root.
        Square back and negate to form b. c is not parameterized in any special
        way since it enters to the kernel with c^2. Absolute value of c will be
        taken to make sure it is positive.
        """
        J = test_locs0.shape[0]
        X = dat.data()
        n, d = X.shape

        def obj(sqrt_neg_b, c, V):
            b = -sqrt_neg_b**2
            return -IMQFSSD.power_criterion(p, dat, b, c, V, reg=reg)

        flatten = lambda sqrt_neg_b, c, V: np.hstack(
            (sqrt_neg_b, c, V.reshape(-1)))

        def unflatten(x):
            sqrt_neg_b = x[0]
            c = x[1]
            V = np.reshape(x[2:], (J, d))
            return sqrt_neg_b, c, V

        def flat_obj(x):
            sqrt_neg_b, c, V = unflatten(x)
            return obj(sqrt_neg_b, c, V)

        # gradient
        #grad_obj = autograd.elementwise_grad(flat_obj)
        # Initial point
        b02 = np.sqrt(-b0)
        x0 = flatten(b02, c0, test_locs0)

        # Make a box to bound test locations
        X_std = np.std(X, axis=0)
        # X_min: length-d array
        X_min = np.min(X, axis=0)
        X_max = np.max(X, axis=0)

        # V_lb: J x d
        V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1))
        V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1))

        # (J*d+2) x 2. Make sure to bound the reparamterized values (not the original)
        """
        For b, b2 := sqrt(-b)
            lb <= b <= ub < 0 means 

            sqrt(-ub) <= b2 <= sqrt(-lb)
            Note the positions of ub, lb.
        """
        x0_lb = np.hstack((np.sqrt(-b_ub), c_lb, np.reshape(V_lb, -1)))
        x0_ub = np.hstack((np.sqrt(-b_lb), c_ub, np.reshape(V_ub, -1)))
        x0_bounds = zip(x0_lb, x0_ub)

        # optimize. Time the optimization as well.
        # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html
        grad_obj = autograd.elementwise_grad(flat_obj)
        with util.ContextTimer() as timer:
            opt_result = scipy.optimize.minimize(
                flat_obj,
                x0,
                method='L-BFGS-B',
                bounds=x0_bounds,
                tol=tol_fun,
                options={
                    'maxiter': max_iter,
                    'ftol': tol_fun,
                    'disp': disp,
                    'gtol': 1.0e-06,
                },
                jac=grad_obj,
            )

        opt_result = dict(opt_result)
        opt_result['time_secs'] = timer.secs
        x_opt = opt_result['x']
        sqrt_neg_b, c, V_opt = unflatten(x_opt)
        b = -sqrt_neg_b**2
        assert util.is_real_num(b), 'b is not real. Was {}'.format(b)
        assert b < 0
        assert util.is_real_num(c), 'c is not real. Was {}'.format(c)
        assert c > 0

        return V_opt, b, c, opt_result