Exemplo n.º 1
0
def met_gfssdJ1_3sopt_tr50(P, Q, data_source, n, r, J=1, tr_proportion=0.5):
    """
    FSSD-based model comparison test
        * Use J=1 test location by default (in the set V=W). 
        * 3sopt = optimize the test locations by maximizing the 3-model test's
        power criterion. There is only one set of test locations.
        * One Gaussian kernel for the two FSSD statistics. Optimize the
        Gaussian width
    """
    if not P.has_unnormalized_density() or not Q.has_unnormalized_density():
        # Not applicable. Return {}.
        return {}
    assert J >= 1

    p = P.get_unnormalized_density()
    q = Q.get_unnormalized_density()
    # sample some data
    datr = sample_pqr(None, None, data_source, n, r, only_from_r=True)

    # Start the timer here
    with util.ContextTimer() as t:
        # split the data into training/test sets
        datrtr, datrte = datr.split_tr_te(tr_proportion=tr_proportion, seed=r)
        Ztr = datrtr.data()

        # median heuristic to set the Gaussian widths
        medz = util.meddistance(Ztr, subsample=1000)
        gwidth0 = medz**2
        # pick a subset of points in the training set for V, W
        V0 = util.subsample_rows(Ztr, J, seed=r + 2)

        # optimization options
        opt_options = {
            'max_iter': 100,
            'reg': 1e-3,
            'tol_fun': 1e-6,
            'locs_bounds_frac': 100,
            'gwidth_lb': 0.1**2,
            'gwidth_ub': 10**2,
        }

        V_opt, gw_opt, opt_info = mct.DC_GaussFSSD.optimize_power_criterion(
            p, q, datrtr, V0, gwidth0, **opt_options)

        dcfssd_opt = mct.DC_GaussFSSD(p,
                                      q,
                                      gw_opt,
                                      gw_opt,
                                      V_opt,
                                      V_opt,
                                      alpha=alpha)
        dcfssd_opt_result = dcfssd_opt.perform_test(datrte)

    return {
        # This key "test" can be removed. Storing V, W can take quite a lot
        # of space, especially when the input dimension d is high.
        #'test':dcfssd_opt,
        'test_result': dcfssd_opt_result,
        'time_secs': t.secs
    }
Exemplo n.º 2
0
    def perform_test(self, dat):
        """
        :param dat: an instance of kmod.data.Data
        """
        with util.ContextTimer() as t:
            alpha = self.alpha
            X = dat.data()
            n = X.shape[0]
            #mean and variance are not yet scaled by \sqrt{n}
            # The variance is the same for both H0 and H1.
            mean_h1, var = self.get_H1_mean_variance(dat)
            stat = (n**0.5) * mean_h1
            null_std = var**0.5
            if null_std <= 1e-6:
                log.l().warning(
                    'SD of the null distribution is too small. Was {}. Will not reject H0.'
                    .format(null_std))
                pval = np.inf
            else:
                # Assume the mean of the null distribution is 0
                pval = stats.norm.sf(stat, loc=0, scale=null_std)

        results = {
            'alpha': self.alpha,
            'pvalue': pval,
            'test_stat': stat,
            'h0_rejected': pval < alpha,
            'time_secs': t.secs,
        }
        return results
Exemplo n.º 3
0
    def compute(self):

        P = self.P
        Q = self.Q
        data_source = self.data_source
        r = self.rep
        n = self.n
        met_func = self.met_func
        prob_label = self.prob_label

        logger.info("computing. %s. prob=%s, r=%d,\
                n=%d" % (met_func.__name__, prob_label, r, n))
        with util.ContextTimer() as t:
            job_result = met_func(P, Q, data_source, n, r)

            # create ScalarResult instance
            result = SingleResult(job_result)
            # submit the result to my own aggregator
            self.aggregator.submit_result(result)
            func_name = met_func.__name__

        logger.info("done. ex2: %s, prob=%s, r=%d, n=%d. Took: %.3g s " %
                    (func_name, prob_label, r, n, t.secs))

        # save result
        fname = '%s-%s-n%d_r%d_a%.3f.p' \
                %(prob_label, func_name, n, r, alpha )
        glo.ex_save_result(ex, job_result, prob_label, fname)
Exemplo n.º 4
0
def fid_permutation_test(X, Y, Z, alpha=0.01, n_permute=400, seed=893):
    assert X.shape == Y.shape
    assert X.shape == Z.shape
    XYZ = np.vstack([X, Y, Z])
    nxyz = XYZ.shape[0]
    nx = ny = X.shape[0]
    splits = 1
    split_size = X.shape[0]
    split_method = 'copy'
    split_args = {'splits': splits, 'n': split_size, 'split_method': split_method}

    with util.ContextTimer(seed) as t:
        stat = np.mean(fid_score(X, Z, **split_args)) - np.mean(fid_score(Y, Z, **split_args))
        list_fid = np.zeros((n_permute))
        with util.NumpySeedContext(seed):
            for r in range(n_permute):
                ind = np.random.choice(nxyz, nxyz, replace=False)
                indx = ind[:nx]
                indy = ind[nx:nx+ny]
                indz = ind[nx+ny:]
                codes_p = XYZ[indx]
                codes_q = XYZ[indy]
                codes_r = XYZ[indz]
                fid_xz = np.mean(fid_score(codes_p, codes_r, **split_args))
                fid_yz = np.mean(fid_score(codes_q, codes_r, **split_args))
                list_fid[r] = fid_xz - fid_yz
    pvalue = np.mean(list_fid > stat)
    results = {'alpha': alpha, 'pvalue': pvalue, 'test_stat': stat,
               'h0_rejected': pvalue < alpha, 'n_permute': n_permute,
               'time_secs': t.secs,
               }
    return results
Exemplo n.º 5
0
    def grad_power_noise(x):
        """
        Compute the gradient of the power criterion with respect to the width of Gaussian
        RBF kernel and the noise vector.

        Args:
            x: 1 + 2J*d_n vector
        Returns:
            the gradient of the power criterion with respect to kernel width/latent vector
        """

        with util.ContextTimer() as t:
            width, z = unflatten(x)
            zp = z[:J]
            zq = z[J:]

            # Compute the Jacobian of the generators with respect to noise vector
            torch_zp = to_torch_variable(zp, shape=(-1, zp.shape[1], 1, 1),
                                         requires_grad=True)
            torch_zq = to_torch_variable(zq, shape=(-1, zq.shape[1], 1, 1),
                                         requires_grad=True)
            gp_grad = compute_jacobian(torch_zp, gen_p(torch_zp).view(J, -1))  # J x d_pix x d_noise x 1 x 1
            gq_grad = compute_jacobian(torch_zq, gen_q(torch_zq).view(J, -1))  # J x d_pix x d_noise x 1 x 1
            v_grad_z = np.vstack([gp_grad, gq_grad])
            v_grad_z = np.squeeze(v_grad_z, [3, 4])  # 2J x d_pix x d_noise
            
            # Compute the Jacobian of the feature extractor with respect to noise vector
            vp_flatten = to_torch_variable(
                gen_p(torch_zp).view(J, -1).cpu().data.numpy(),
                shape=(J, 3, image_size, image_size),
                requires_grad=True
            )
            vq_flatten = to_torch_variable(
                gen_q(torch_zq).view(J, -1).cpu().data.numpy(),
                shape=(J, 3, image_size, image_size),
                requires_grad=True
            )
            size = (model_input_size, model_input_size)
            upsample = nn.Upsample(size=size, mode='bilinear')
            fp = model(upsample(vp_flatten))
            fq = model(upsample(vq_flatten))
            fp_grad = compute_jacobian(vp_flatten, fp.view(J, -1))  # J x d_nn x C x H x W
            fq_grad = compute_jacobian(vq_flatten, fq.view(J, -1))  # J x d_nn x C x H x W
            f_grad_v = np.vstack([fp_grad, fq_grad])
            f_grad_v = f_grad_v.reshape((2*J, f_grad_v.shape[1], -1))  # 2J x d_nn x d_pix

            # Compute the gradient of the objective function with respect to
            # the gaussian width and test locations
            F = np.vstack([fp.cpu().data.numpy(), fq.cpu().data.numpy()])
            F = np.reshape(F, (2*J, -1))
            grad_obj = autograd.elementwise_grad(flat_obj_feat)  # 1+(2J)*d_nn input
            obj_grad_f = grad_obj(flatten(width, F))
            obj_grad_width = obj_grad_f[0]
            obj_grad_f = np.reshape(obj_grad_f[1:], [(2*J), -1])  # 2J x d_nn array

            obj_grad_v = inner1d(obj_grad_f, np.transpose(f_grad_v, (2, 0, 1)))  # 2J x d_pix
            obj_grad_z = inner1d(obj_grad_v.T, np.transpose(v_grad_z, (2, 0, 1))).flatten()

        return np.concatenate([obj_grad_width.reshape([1]), obj_grad_z]) 
Exemplo n.º 6
0
def run_optimization(args, gp, gq, img_data, model_name, J=10):
    """
    Wrapper for noise space optimization

    """

    model = load_pretrained_model(model_name)
    model.eval()
    if model_name == 'inceptionv3':
        feat_func = model.pool3
    else:
        feat_func = model.features

    sample_size = args.sample_size  # number of images we want to generate
    samples_p = sample_images(gp, sample_size)
    datap = go.extract_feats(samples_p, feat_func, upsample=True)

    samples_q = sample_images(gq, sample_size)
    dataq = go.extract_feats(samples_q, feat_func, upsample=True)

    ind = util.subsample_ind(img_data.shape[0], sample_size)
    datar = img_data[ind]
    datar = samples_p = go.extract_feats(datar.transpose((0, 3, 1, 2)),
                                         feat_func,
                                         upsample=True)
    datap = data.Data(datap)
    dataq = data.Data(dataq)
    datar = data.Data(datar)

    Zp0 = np.random.uniform(-1, 1, (J, gp.z_size))
    Zq0 = np.random.uniform(-1, 1, (J, gq.z_size))
    XYZ = np.vstack((datap.data(), dataq.data(), datar.data()))
    med2 = util.meddistance(XYZ, subsample=1000)**2

    if args.exp == 2:
        gp = gq

    with util.ContextTimer() as t:
        Z_opt, gw_opt, opt_result = go.optimize_3sample_criterion(datap,
                                                                  dataq,
                                                                  datar,
                                                                  gp,
                                                                  gq,
                                                                  feat_func,
                                                                  Zp0,
                                                                  Zq0,
                                                                  gwidth0=med2)

    results = {}
    results['Z'] = Z_opt
    results['width'] = gw_opt
    results['opt'] = opt_result
    results['t'] = t
    results['ind'] = ind

    return results
Exemplo n.º 7
0
def met_gmmd_med(P, Q, data_source, n, r):
    """
    Use met_gmmd_med_bounliphone(). It uses the median heuristic following
    Bounliphone et al., 2016.

    Bounliphone et al., 2016's MMD-based 3-sample test.
    * Gaussian kernel. 
    * Gaussian width = mean of (median heuristic on (X, Z), median heuristic on
        (Y, Z))
    * Use full sample for testing (no
    holding out for optimization)
    """
    if not P.has_datasource() or not Q.has_datasource():
        # Not applicable. Return {}.
        return {}

    ds_p = P.get_datasource()
    ds_q = Q.get_datasource()
    # sample some data
    datp, datq, datr = sample_pqr(ds_p,
                                  ds_q,
                                  data_source,
                                  n,
                                  r,
                                  only_from_r=False)

    # Start the timer here
    with util.ContextTimer() as t:
        X, Y, Z = datp.data(), datq.data(), datr.data()

        # hyperparameters of the test
        medxz = util.meddistance(np.vstack((X, Z)), subsample=1000)
        medyz = util.meddistance(np.vstack((Y, Z)), subsample=1000)
        medxyz = np.mean([medxz, medyz])
        k = kernel.KGauss(sigma2=medxyz**2)

        scmmd = mct.SC_MMD(datp, datq, k, alpha=alpha)
        scmmd_result = scmmd.perform_test(datr)

    return {
        # This key "test" can be removed.
        #'test': scmmd,
        'test_result': scmmd_result,
        'time_secs': t.secs
    }
Exemplo n.º 8
0
def met_kid(mix_ratios, data_loader, n, r):
    """
    Compute MMD with the KID kernel. Note that this is not a test.
    """
    sample_size = [n] * 3
    X, Y, Z, _ = sample_data_mixing(mix_ratios, data_loader, sample_size, r)

    n_set = 100
    sub_size = 1000

    with util.ContextTimer() as t:
        kid_scores_xz = polynomial_mmd_averages(X,
                                                Z,
                                                degree=3,
                                                gamma=None,
                                                coef0=1,
                                                ret_var=False,
                                                n_subsets=n_set,
                                                subset_size=sub_size)

        kid_scores_yz = polynomial_mmd_averages(Y,
                                                Z,
                                                degree=3,
                                                gamma=None,
                                                coef0=1,
                                                ret_var=False,
                                                n_subsets=n_set,
                                                subset_size=sub_size)

        kid_score_xz = np.mean(kid_scores_xz)
        kid_score_yz = np.mean(kid_scores_yz)

    result = {
        'n_set': n_set,
        'sub_size': sub_size,
        'score_xz': kid_score_xz,
        'score_yz': kid_score_yz,
        'time_secs': t.secs,
        'sample_size': n,
        'rep': r,
        'method': 'kid'
    }
    return result
Exemplo n.º 9
0
def met_gmmd_med_bounliphone(P, Q, data_source, n, r):
    """
    Bounliphone et al., 2016's MMD-based 3-sample test.
    * Gaussian kernel. 
    * Gaussian width = chosen as described in https://github.com/wbounliphone/relative_similarity_test/blob/4884786aa3fe0f41b3ee76c9587de535a6294aee/relativeSimilarityTest_finalversion.m 
    * Use full sample for testing (no
    holding out for optimization)
    """
    if not P.has_datasource() or not Q.has_datasource():
        # Not applicable. Return {}.
        return {}

    ds_p = P.get_datasource()
    ds_q = Q.get_datasource()
    # sample some data
    datp, datq, datr = sample_pqr(ds_p,
                                  ds_q,
                                  data_source,
                                  n,
                                  r,
                                  only_from_r=False)

    # Start the timer here
    with util.ContextTimer() as t:
        X, Y, Z = datp.data(), datq.data(), datr.data()

        med2 = mct.SC_MMD.median_heuristic_bounliphone(X,
                                                       Y,
                                                       Z,
                                                       subsample=1000,
                                                       seed=r + 3)
        k = kernel.KGauss(sigma2=med2)

        scmmd = mct.SC_MMD(datp, datq, k, alpha=alpha)
        scmmd_result = scmmd.perform_test(datr)

    return {
        # This key "test" can be removed.
        # 'test': scmmd,
        'test_result': scmmd_result,
        'time_secs': t.secs
    }
Exemplo n.º 10
0
def met_fid(mix_ratios, data_loader, n, r):
    """
    Compute the FIDs FID(P, R) and FIR(Q, R).
    The bootstrap estimator from Binkowski et al. 2018 is used.
    The number of bootstrap sampling can be specified by the variable splits
    below. For the method for the non-bootstrap version, see the method
    met_fid_nbstrp.
    """
    sample_size = [n] * 3
    X, Y, Z, _ = sample_data_mixing(mix_ratios, data_loader, sample_size, r)

    # keeping it the same as the comparison in MMD gan paper, 10 boostrap resamplings
    splits = 10
    split_size = X.shape[0]
    assert X.shape == Y.shape
    assert X.shape == Z.shape
    split_method = 'bootstrap'
    split_args = {
        'splits': splits,
        'n': split_size,
        'split_method': split_method
    }

    with util.ContextTimer() as t:
        fid_scores_xz = fid_score(X, Z, **split_args)
        fid_scores_yz = fid_score(Y, Z, **split_args)

        fid_score_xz = np.mean(fid_scores_xz)
        fid_score_yz = np.mean(fid_scores_yz)

    result = {
        'splits': splits,
        'sample_size': split_size,
        'score_xz': fid_score_xz,
        'score_yz': fid_score_yz,
        'time_secs': t.secs,
        'method': 'fid'
    }

    return result
Exemplo n.º 11
0
    def perform_test(self, dat):
        """
        :param dat: an instance of kmod.data.Data
        """
        with util.ContextTimer() as t:
            alpha = self.alpha
            X = dat.data()
            n = X.shape[0]
            #mean and variance are not yet scaled by \sqrt{n}
            mean, var = self.get_H1_mean_variance(dat)
            stat = (n**0.5) * mean
            # Assume the mean of the null distribution is 0
            pval = stats.norm.sf(stat, loc=0, scale=var**0.5)

        results = {
            'alpha': self.alpha,
            'pvalue': pval,
            'test_stat': stat,
            'h0_rejected': pval < alpha,
            'time_secs': t.secs,
        }
        return results
Exemplo n.º 12
0
def met_fid_nbstrp(mix_ratios, data_loader, n, r):
    """
    Compute the FIDs FID(P, R) and FIR(Q, R).
    Unlike met_fid, the estimator is constructed by plugging the sample means and
    the sample covariances into the definition of FID.
    """
    sample_size = [n] * 3
    X, Y, Z, _ = sample_data_mixing(mix_ratios, data_loader, sample_size, r)

    # keeping it the same as the comparison in MMD gan paper, 10 boostrap resamplings
    splits = 1
    split_size = X.shape[0]
    assert X.shape == Y.shape
    assert X.shape == Z.shape
    split_method = 'copy'
    split_args = {
        'splits': splits,
        'n': split_size,
        'split_method': split_method
    }

    with util.ContextTimer() as t:
        fid_scores_xz = fid_score(X, Z, **split_args)
        fid_scores_yz = fid_score(Y, Z, **split_args)

        fid_score_xz = np.mean(fid_scores_xz)
        fid_score_yz = np.mean(fid_scores_yz)

    result = {
        'splits': splits,
        'sample_size': split_size,
        'score_xz': fid_score_xz,
        'score_yz': fid_score_yz,
        'time_secs': t.secs,
        'method': 'fid'
    }

    return result
Exemplo n.º 13
0
    def perform_test(self, dat):
        """perform the model comparison test and return values computed in a
        dictionary: 
        {
            alpha: 0.01,
            pvalue: 0.0002,
            test_stat: 2.3,
            h0_rejected: True,
            time_secs: ...
        }

        :param dat: an instance of kmod.data.Data
        """
        with util.ContextTimer() as t:
            alpha = self.alpha
            X = dat.data()
            n = X.shape[0]
            # mean and variance are not yet scaled by \sqrt{n}
            # The variance is the same for both H0 and H1.
            mean_h1, var = self.get_H1_mean_variance(dat)
            if not util.is_real_num(var) or var < 0:
                log.l().warning('Invalid H0 variance. Was {}'.format(var))
            stat = (n**0.5) * mean_h1
            # Assume the mean of the null distribution is 0
            pval = stats.norm.sf(stat, loc=0, scale=var**0.5)
            if not util.is_real_num(pval):
                log.l().warning(
                    'p-value is not a real number. Was {}'.format(pval))

        results = {
            'alpha': self.alpha,
            'pvalue': pval,
            'test_stat': stat,
            'h0_rejected': pval < alpha,
            'time_secs': t.secs,
        }
        return results
Exemplo n.º 14
0
def optimize_3sample_criterion(datap,
                               dataq,
                               datar,
                               gen_p,
                               gen_q,
                               model,
                               Zp0,
                               Zq0,
                               gwidth0,
                               reg=1e-3,
                               max_iter=100,
                               tol_fun=1e-6,
                               disp=False,
                               locs_bounds_frac=100,
                               gwidth_lb=None,
                               gwidth_ub=None):
    """
    Similar to optimize_2sets_locs_widths() but constrain V=W and
    constrain the two kernels to be the same Gaussian kernel.
    Optimize one set of test locations and one Gaussian kernel width by
    maximizing the test power criterion of the UME *three*-sample test

    This optimization function is deterministic.

    Args:
        - datap: a kgof.data.Data from P (model 1)
        - dataq: a kgof.data.Data from Q (model 2)
        - datar: a kgof.data.Data from R (data generating distribution)
        - gen_p: pytorch model representing the generator p (model 1)
        - gen_q: pytorch model representing the generator q (model 2)
        - Zp0: Jxd_n numpy array. Initial value for the noise vectors of J locations.
           This is for model 1. 
        - Zq0: Jxd_n numpy array. Initial V containing J locations. For both
           This is for model 22. 
        - model: a feature extractor applied to generated images 
        - gwidth0: initial value of the Gaussian width^2 for both UME(P, R),
              and UME(Q, R)
        - reg: reg to add to the mean/sqrt(variance) criterion to become
            mean/sqrt(variance + reg)
        - max_iter: #gradient descent iterations
        - tol_fun: termination tolerance of the objective value
        - disp: True to print convergence messages
        - locs_bounds_frac: When making box bounds for the test_locs, extend
              the box defined by coordinate-wise min-max by std of each
              coordinate (of the aggregated data) multiplied by this number.
        - gwidth_lb: absolute lower bound on both the Gaussian width^2
        - gwidth_ub: absolute upper bound on both the Gaussian width^2

        If the lb, ub bounds are None, use fraction of the median heuristics
            to automatically set the bounds.
    Returns:
        - Z_opt: optimized noise vectors Z
        - gw_opt: optimized Gaussian width^2
        - opt_result: info from the optimization
    """
    J, dn = Zp0.shape
    Z0 = np.vstack([Zp0, Zq0])

    X, Y, Z = datap.data(), dataq.data(), datar.data()
    n, dp = X.shape

    global image_size

    def flatten(gwidth, V):
        return np.hstack((gwidth, V.reshape(-1)))

    def unflatten(x):
        sqrt_gwidth = x[0]
        V = np.reshape(x[1:], (2 * J, -1))
        return sqrt_gwidth, V

    # Parameterize the Gaussian width with its square root (then square later)
    # to automatically enforce the positivity.
    def obj_feat_space(sqrt_gwidth, V):
        k = kernel.KGauss(sqrt_gwidth**2)
        return -SC_UME.power_criterion(
            datap, dataq, datar, k, k, V, V, reg=reg)

    def flat_obj_feat(x):
        sqrt_gwidth, V = unflatten(x)
        return obj_feat_space(sqrt_gwidth, V)

    def obj_noise_space(sqrt_gwidth, z):
        zp = z[:J]
        zq = z[J:]
        torch_zp = to_torch_variable(zp, shape=(-1, zp.shape[1], 1, 1))
        torch_zq = to_torch_variable(zq, shape=(-1, zq.shape[1], 1, 1))
        # need preprocessing probably
        global model_input_size
        s = model_input_size
        upsample = nn.Upsample(size=(s, s), mode='bilinear')
        fp = model(upsample(gen_p(torch_zp))).cpu().data.numpy()
        fp = fp.reshape((J, -1))
        fq = model(upsample(gen_q(torch_zq))).cpu().data.numpy()
        fq = fq.reshape((J, -1))
        F = np.vstack([fp, fq])
        return obj_feat_space(sqrt_gwidth, F)

    def flat_obj_noise(x):
        sqrt_gwidth, z = unflatten(x)
        return obj_noise_space(sqrt_gwidth, z)

    def grad_power_noise(x):
        """
        Compute the gradient of the power criterion with respect to the width of Gaussian
        RBF kernel and the noise vector.

        Args:
            x: 1 + 2J*d_n vector
        Returns:
            the gradient of the power criterion with respect to kernel width/latent vector
        """

        with util.ContextTimer() as t:
            width, z = unflatten(x)
            zp = z[:J]
            zq = z[J:]

            # Compute the Jacobian of the generators with respect to noise vector
            torch_zp = to_torch_variable(zp,
                                         shape=(-1, zp.shape[1], 1, 1),
                                         requires_grad=True)
            torch_zq = to_torch_variable(zq,
                                         shape=(-1, zq.shape[1], 1, 1),
                                         requires_grad=True)
            gp_grad = compute_jacobian(
                torch_zp,
                gen_p(torch_zp).view(J, -1))  # J x d_pix x d_noise x 1 x 1
            gq_grad = compute_jacobian(
                torch_zq,
                gen_q(torch_zq).view(J, -1))  # J x d_pix x d_noise x 1 x 1
            v_grad_z = np.vstack([gp_grad, gq_grad])
            v_grad_z = np.squeeze(v_grad_z, [3, 4])  # 2J x d_pix x d_noise

            # Compute the Jacobian of the feature extractor with respect to noise vector
            vp_flatten = to_torch_variable(
                gen_p(torch_zp).view(J, -1).cpu().data.numpy(),
                shape=(J, 3, image_size, image_size),
                requires_grad=True)
            vq_flatten = to_torch_variable(
                gen_q(torch_zq).view(J, -1).cpu().data.numpy(),
                shape=(J, 3, image_size, image_size),
                requires_grad=True)
            size = (model_input_size, model_input_size)
            upsample = nn.Upsample(size=size, mode='bilinear')
            fp = model(upsample(vp_flatten))
            fq = model(upsample(vq_flatten))
            fp_grad = compute_jacobian(vp_flatten,
                                       fp.view(J, -1))  # J x d_nn x C x H x W
            fq_grad = compute_jacobian(vq_flatten,
                                       fq.view(J, -1))  # J x d_nn x C x H x W
            f_grad_v = np.vstack([fp_grad, fq_grad])
            f_grad_v = f_grad_v.reshape(
                (2 * J, f_grad_v.shape[1], -1))  # 2J x d_nn x d_pix

            # Compute the gradient of the objective function with respect to
            # the gaussian width and test locations
            F = np.vstack([fp.cpu().data.numpy(), fq.cpu().data.numpy()])
            F = np.reshape(F, (2 * J, -1))
            grad_obj = autograd.elementwise_grad(
                flat_obj_feat)  # 1+(2J)*d_nn input
            obj_grad_f = grad_obj(flatten(width, F))
            obj_grad_width = obj_grad_f[0]
            obj_grad_f = np.reshape(obj_grad_f[1:],
                                    [(2 * J), -1])  # 2J x d_nn array

            obj_grad_v = inner1d(obj_grad_f,
                                 np.transpose(f_grad_v,
                                              (2, 0, 1)))  # 2J x d_pix
            obj_grad_z = inner1d(obj_grad_v.T,
                                 np.transpose(v_grad_z, (2, 0, 1))).flatten()

        return np.concatenate([obj_grad_width.reshape([1]), obj_grad_z])

    # Initial point
    x0 = flatten(np.sqrt(gwidth0), Z0)

    # make sure that the optimized gwidth is not too small or too large.
    XYZ = np.vstack((X, Y, Z))
    med2 = util.meddistance(XYZ, subsample=1000)**2
    fac_min = 1e-2
    fac_max = 1e2
    if gwidth_lb is None:
        gwidth_lb = max(fac_min * med2, 1e-3)
    if gwidth_ub is None:
        gwidth_ub = min(fac_max * med2, 1e5)

    # # Make a box to bound test locations
    # XYZ_std = np.std(XYZ, axis=0)
    # # XYZ_min: length-d array
    # XYZ_min = np.min(XYZ, axis=0)
    # XYZ_max = np.max(XYZ, axis=0)
    # # V_lb: 2J x dn
    # V_lb = np.tile(XYZ_min - locs_bounds_frac*XYZ_std, (2*J, 1))
    # V_ub = np.tile(XYZ_max + locs_bounds_frac*XYZ_std, (2*J, 1))
    # # (J*d+1) x 2. Take square root because we parameterize with the square
    # # root
    # x0_lb = np.hstack((np.sqrt(gwidth_lb), np.reshape(V_lb, -1)))
    # x0_ub = np.hstack((np.sqrt(gwidth_ub), np.reshape(V_ub, -1)))
    # #x0_bounds = list(zip(x0_lb, x0_ub))

    # Assuming noise coming uniform dist over unit cube
    x0_bounds = [(gwidth_lb, gwidth_ub)] + [(-1, 1)] * (2 * J * dn)

    # optimize. Time the optimization as well.
    # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html
    with util.ContextTimer() as timer:
        opt_result = scipy.optimize.minimize(flat_obj_noise,
                                             x0,
                                             method='L-BFGS-B',
                                             bounds=x0_bounds,
                                             tol=tol_fun,
                                             options={
                                                 'maxiter': max_iter,
                                                 'ftol': tol_fun,
                                                 'disp': disp,
                                                 'gtol': 1.0e-08,
                                             },
                                             jac=grad_power_noise)

    opt_result = dict(opt_result)
    opt_result['time_secs'] = timer.secs
    x_opt = opt_result['x']
    sq_gw_opt, Z_opt = unflatten(x_opt)
    gw_opt = sq_gw_opt**2

    assert util.is_real_num(gw_opt), 'gw_opt is not real. Was %s' % str(gw_opt)
    return Z_opt, gw_opt, opt_result
Exemplo n.º 15
0
def met_gumeJ1_2V_rand(P, Q, data_source, n, r, J=1, use_1set_locs=False):
    """
    UME-based three-sample test. 
        * Use J=1 test location by default. 
        * Use two sets (2V) of test locations by default: V and W, each having J
            locations.  Will constrain V=W if use_1set_locs=True.
        * The test locations are selected at random from the data. Selected
            points are removed for testing.
        * Gaussian kernels for the two UME statistics. Median heuristic is used
            to select each width.
    """
    if not P.has_datasource() or not Q.has_datasource():
        # Not applicable. Return {}.
        return {}
    assert J >= 1

    ds_p = P.get_datasource()
    ds_q = Q.get_datasource()
    # sample some data
    datp, datq, datr = sample_pqr(ds_p,
                                  ds_q,
                                  data_source,
                                  n,
                                  r,
                                  only_from_r=False)

    # Start the timer here
    with util.ContextTimer() as t:

        # remove the first J points from each set
        X, Y, Z = datp.data(), datq.data(), datr.data()

        # containing 3*J points
        pool3J = np.vstack((X[:J, :], Y[:J, :], Z[:J, :]))
        X, Y, Z = (X[J:, :], Y[J:, :], Z[J:, :])

        datp, datq, datr = [data.Data(a) for a in [X, Y, Z]]
        assert X.shape[0] == Y.shape[0]
        assert Y.shape[0] == Z.shape[0]
        assert Z.shape[0] == n - J
        assert datp.sample_size() == n - J
        assert datq.sample_size() == n - J
        assert datr.sample_size() == n - J

        #XYZ = np.vstack((X, Y, Z))
        #stds = np.std(util.subsample_rows(XYZ, min(n-3*J, 500),
        #    seed=r+87), axis=0)
        d = X.shape[1]
        # add a little noise to the locations.
        with util.NumpySeedContext(seed=r * 191):
            #pool3J = pool3J + np.random.randn(3*J, d)*np.max(stds)*3
            pool3J = np.random.randn(3 * J, d) * 2

        # median heuristic to set the Gaussian widths
        medxz = util.meddistance(np.vstack((X, Z)), subsample=1000)
        medyz = util.meddistance(np.vstack((Z, Y)), subsample=1000)
        if use_1set_locs:
            # randomly select J points from the pool3J for the J test locations
            #V = util.subsample_rows(pool3J, J, r)
            V = pool3J[:J, :]
            W = V
            k = kernel.KGauss(sigma2=np.mean([medxz, medyz])**2)
            l = k
        else:
            # use two sets of locations: V and W
            #VW = util.subsample_rows(pool3J, 2*J, r)
            VW = pool3J[:2 * J, :]
            V = VW[:J, :]
            W = VW[J:, :]

            # 2 Gaussian kernels
            k = kernel.KGauss(sigma2=medxz**2)
            l = kernel.KGauss(sigma2=medyz**2)

        # construct the test
        scume = mct.SC_UME(datp, datq, k, l, V, W, alpha=alpha)
        scume_rand_result = scume.perform_test(datr)

    return {
        # This key "test" can be removed. Storing V, W can take quite a lot
        # of space, especially when the input dimension d is high.
        #'test':scume,
        'test_result': scume_rand_result,
        'time_secs': t.secs
    }
Exemplo n.º 16
0
def met_gumeJ1_3sopt_tr50(P, Q, data_source, n, r, J=1, tr_proportion=0.5):
    """
    UME-based three-sample test
        * Use J=1 test location by default (in the set V=W). 
        * 3sopt = optimize the test locations by maximizing the 3-sample test's
        power criterion. There is only one set of test locations.
        * One Gaussian kernel for the two UME statistics. Optimize the Gaussian width
    """
    if not P.has_datasource() or not Q.has_datasource():
        # Not applicable. Return {}.
        return {}
    assert J >= 1

    ds_p = P.get_datasource()
    ds_q = Q.get_datasource()
    # sample some data
    datp, datq, datr = sample_pqr(ds_p,
                                  ds_q,
                                  data_source,
                                  n,
                                  r,
                                  only_from_r=False)

    # Start the timer here
    with util.ContextTimer() as t:
        # split the data into training/test sets
        [(datptr, datpte), (datqtr, datqte), (datrtr, datrte)] = \
            [D.split_tr_te(tr_proportion=tr_proportion, seed=r) for D in [datp, datq, datr]]
        Xtr, Ytr, Ztr = [D.data() for D in [datptr, datqtr, datrtr]]
        Xyztr = np.vstack((Xtr, Ytr, Ztr))
        # initialize optimization parameters.
        # Initialize the Gaussian widths with the median heuristic
        medxz = util.meddistance(np.vstack((Xtr, Ztr)), subsample=1000)
        medyz = util.meddistance(np.vstack((Ztr, Ytr)), subsample=1000)
        gwidth0 = np.mean([medxz, medyz])**2

        # pick a subset of points in the training set for V, W
        V0 = util.subsample_rows(Xyztr, J, seed=r + 2)

        # optimization options
        opt_options = {
            'max_iter': 100,
            'reg': 1e-6,
            'tol_fun': 1e-7,
            'locs_bounds_frac': 50,
            'gwidth_lb': 0.1,
            'gwidth_ub': 6**2,
        }
        V_opt, gw2_opt, opt_result = mct.SC_GaussUME.optimize_3sample_criterion(
            datptr, datqtr, datrtr, V0, gwidth0, **opt_options)
        k_opt = kernel.KGauss(gw2_opt)

        # construct a UME test
        scume_opt3 = mct.SC_UME(datpte,
                                datqte,
                                k_opt,
                                k_opt,
                                V_opt,
                                V_opt,
                                alpha=alpha)
        scume_opt3_result = scume_opt3.perform_test(datrte)

    return {
        # This key "test" can be removed. Storing V, W can take quite a lot
        # of space, especially when the input dimension d is high.
        #'test':scume,
        'test_result': scume_opt3_result,
        'time_secs': t.secs
    }
Exemplo n.º 17
0
def met_gumeJ1_2sopt_tr50(P, Q, data_source, n, r, J=1, tr_proportion=0.5):
    """
    UME-based three-sample test
        * Use J=1 test location by default. 
        * 2sopt = optimize the two sets of test locations by maximizing the
            2-sample test's power criterion. Each set is optmized separately.
        * Gaussian kernels for the two UME statistics. The Gaussian widths are
        also optimized separately.
    """
    if not P.has_datasource() or not Q.has_datasource():
        # Not applicable. Return {}.
        return {}
    assert J >= 1

    ds_p = P.get_datasource()
    ds_q = Q.get_datasource()
    # sample some data
    datp, datq, datr = sample_pqr(ds_p,
                                  ds_q,
                                  data_source,
                                  n,
                                  r,
                                  only_from_r=False)

    # Start the timer here
    with util.ContextTimer() as t:
        # split the data into training/test sets
        [(datptr, datpte), (datqtr, datqte), (datrtr, datrte)] = \
            [D.split_tr_te(tr_proportion=tr_proportion, seed=r) for D in [datp, datq, datr]]
        Xtr, Ytr, Ztr = [D.data() for D in [datptr, datqtr, datrtr]]

        # initialize optimization parameters.
        # Initialize the Gaussian widths with the median heuristic
        medxz = util.meddistance(np.vstack((Xtr, Ztr)), subsample=1000)
        medyz = util.meddistance(np.vstack((Ytr, Ztr)), subsample=1000)
        gwidth0p = medxz**2
        gwidth0q = medyz**2

        # numbers of test locations in V, W
        Jp = J
        Jq = J

        # pick a subset of points in the training set for V, W
        Xyztr = np.vstack((Xtr, Ytr, Ztr))
        VW = util.subsample_rows(Xyztr, Jp + Jq, seed=r + 1)
        V0 = VW[:Jp, :]
        W0 = VW[Jp:, :]

        # optimization options
        opt_options = {
            'max_iter': 100,
            'reg': 1e-4,
            'tol_fun': 1e-6,
            'locs_bounds_frac': 50,
            'gwidth_lb': 0.1,
            'gwidth_ub': 10**2,
        }

        umep_params, umeq_params = mct.SC_GaussUME.optimize_2sets_locs_widths(
            datptr, datqtr, datrtr, V0, W0, gwidth0p, gwidth0q, **opt_options)
        (V_opt, gw2p_opt, opt_infop) = umep_params
        (W_opt, gw2q_opt, opt_infoq) = umeq_params
        k_opt = kernel.KGauss(gw2p_opt)
        l_opt = kernel.KGauss(gw2q_opt)

        # construct a UME test
        scume_opt2 = mct.SC_UME(datpte,
                                datqte,
                                k_opt,
                                l_opt,
                                V_opt,
                                W_opt,
                                alpha=alpha)
        scume_opt2_result = scume_opt2.perform_test(datrte)

    return {
        # This key "test" can be removed. Storing V, W can take quite a lot
        # of space, especially when the input dimension d is high.
        #'test':scume,
        'test_result': scume_opt2_result,
        'time_secs': t.secs
    }
Exemplo n.º 18
0
    def optimize_3sample_criterion(datap,
                                   dataq,
                                   datar,
                                   V0,
                                   gwidth0,
                                   reg=1e-3,
                                   max_iter=100,
                                   tol_fun=1e-6,
                                   disp=False,
                                   locs_bounds_frac=100,
                                   gwidth_lb=None,
                                   gwidth_ub=None):
        """
        Similar to optimize_2sets_locs_widths() but constrain V=W, and
        constrain the two Gaussian widths to be the same.
        Optimize one set of test locations and one Gaussian kernel width by
        maximizing the test power criterion of the UME *three*-sample test             

        This optimization function is deterministic.

        - datap: a kgof.data.Data from P (model 1)
        - dataq: a kgof.data.Data from Q (model 2)
        - datar: a kgof.data.Data from R (data generating distribution)
        - V0: Jxd numpy array. Initial V containing J locations. For both
              UME(P, R) and UME(Q, R)
        - gwidth0: initial value of the Gaussian width^2 for both UME(P, R),
              and UME(Q, R)
        - reg: reg to add to the mean/sqrt(variance) criterion to become
            mean/sqrt(variance + reg)
        - max_iter: #gradient descent iterations
        - tol_fun: termination tolerance of the objective value
        - disp: True to print convergence messages
        - locs_bounds_frac: When making box bounds for the test_locs, extend
              the box defined by coordinate-wise min-max by std of each
              coordinate (of the aggregated data) multiplied by this number.
        - gwidth_lb: absolute lower bound on both the Gaussian width^2
        - gwidth_ub: absolute upper bound on both the Gaussian width^2

        If the lb, ub bounds are None, use fraction of the median heuristics 
            to automatically set the bounds.
        
        Return (optimized V, optimized Gaussian width^2, info from the optimization)
        """
        J = V0.shape[0]
        X, Y, Z = datap.data(), dataq.data(), datar.data()
        n, d = X.shape

        # Parameterize the Gaussian width with its square root (then square later)
        # to automatically enforce the positivity.
        def obj(sqrt_gwidth, V):
            k = kernel.KGauss(sqrt_gwidth**2)
            return -SC_UME.power_criterion(
                datap, dataq, datar, k, k, V, V, reg=reg)

        flatten = lambda gwidth, V: np.hstack((gwidth, V.reshape(-1)))

        def unflatten(x):
            sqrt_gwidth = x[0]
            V = np.reshape(x[1:], (J, d))
            return sqrt_gwidth, V

        def flat_obj(x):
            sqrt_gwidth, V = unflatten(x)
            return obj(sqrt_gwidth, V)

        # Initial point
        x0 = flatten(np.sqrt(gwidth0), V0)

        #make sure that the optimized gwidth is not too small or too large.
        XYZ = np.vstack((X, Y, Z))
        med2 = util.meddistance(XYZ, subsample=1000)**2
        fac_min = 1e-2
        fac_max = 1e2
        if gwidth_lb is None:
            gwidth_lb = max(fac_min * med2, 1e-2)
        if gwidth_ub is None:
            gwidth_ub = min(fac_max * med2, 1e5)

        # Make a box to bound test locations
        XYZ_std = np.std(XYZ, axis=0)
        # XYZ_min: length-d array
        XYZ_min = np.min(XYZ, axis=0)
        XYZ_max = np.max(XYZ, axis=0)
        # V_lb: J x d
        V_lb = np.tile(XYZ_min - locs_bounds_frac * XYZ_std, (J, 1))
        V_ub = np.tile(XYZ_max + locs_bounds_frac * XYZ_std, (J, 1))
        # (J*d+1) x 2. Take square root because we parameterize with the square
        # root
        x0_lb = np.hstack((np.sqrt(gwidth_lb), np.reshape(V_lb, -1)))
        x0_ub = np.hstack((np.sqrt(gwidth_ub), np.reshape(V_ub, -1)))
        x0_bounds = list(zip(x0_lb, x0_ub))

        # optimize. Time the optimization as well.
        # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html
        grad_obj = autograd.elementwise_grad(flat_obj)
        with util.ContextTimer() as timer:
            opt_result = scipy.optimize.minimize(
                flat_obj,
                x0,
                method='L-BFGS-B',
                bounds=x0_bounds,
                tol=tol_fun,
                options={
                    'maxiter': max_iter,
                    'ftol': tol_fun,
                    'disp': disp,
                    'gtol': 1.0e-08,
                },
                jac=grad_obj,
            )

        opt_result = dict(opt_result)
        opt_result['time_secs'] = timer.secs
        x_opt = opt_result['x']
        sq_gw_opt, V_opt = unflatten(x_opt)
        gw_opt = sq_gw_opt**2

        assert util.is_real_num(
            gw_opt), 'gw_opt is not real. Was %s' % str(gw_opt)
        return V_opt, gw_opt, opt_result