示例#1
0
def estimate_H(X, Y, estimator, B, ratio, discrete_output = None):
    """Estimating H with Block or incomplete U-statistics estimator
    :param B: Block size
    :param ratio: size of incomplete U-statistics estimator
    """
    assert Y.shape[0] == X.shape[0]
    n = X.shape[0]
    p = X.shape[1]  
    x_bw = util.meddistance(X, subsample = 1000)**2
    kx = kernel.KGauss(x_bw)
    if discrete_output is not None:
        freq_dict1, freq_dict2 = discrete_output
        ky = KDiscrete(freq_dict1, freq_dict2)
    else:
        y_bw = util.meddistance(Y[:, np.newaxis], subsample = 1000)**2
        ky = kernel.KGauss(y_bw)
    
    if estimator == 'inc':
        hsic_H = hsic.HSIC_Inc(kx, ky, ratio = ratio)
        m = int(n * ratio)
    else: # 'block'
        hsic_H = hsic.HSIC_Block(kx, ky, bsize = B)
        m = int(np.floor(n / B))

    H_estimates = np.zeros((p, m))
    for i in range(p):
        H_estimates[i, :] = np.reshape(hsic_H.estimates(X[:, i, np.newaxis],
                                                        Y[:, np.newaxis]), -1)
    H = np.mean(H_estimates, axis = 1)
    return H_estimates, H, m
示例#2
0
 def sel_inf(self, X, Y, inf_type, alpha, niv, H0 = None, M0 = None, i = None):
     """Post-selection inference
     :param X, Y: covariate and response data
     :param inf_type: one-sided hypothesis testing or two-sided confidence interval calculation
     :param alpha: level 1-alpha
     :param niv: number of important variables, used for reporting results
     H0, M0 and i are not used
     """
     assert inf_type == 'test'
     p = X.shape[1]
     # Initialising kernels
     x_bw = util.meddistance(X, subsample = 1000)**2
     kx = kernel.KGauss(x_bw)
     if self.discrete_output:
         ky = KDiscrete()
     else:
         y_bw = util.meddistance(Y[:, np.newaxis], subsample = 1000)**2
         ky = kernel.KGauss(y_bw)
     
     if self.estimator == 'inc':
         hsic_H = hsic.HSIC_Inc(kx, ky, ratio = self.l)
     else: # 'block'
         hsic_H = hsic.HSIC_Block(kx, ky, bsize = self.B)
     
     if self.poly:
         feat_select = PolySel(hsic_H)
     else: # multi
         feat_select = MultiSel(hsic_H)
     results = feat_select.test(X, Y[:, np.newaxis], args = self.n_select, alpha = alpha)
     
     # Reporting
     sel_vars = results['sel_vars']
     h0_rejs = results['h0_rejs']
     
     ind_sc_np = None
     ind_sel_np = np.zeros(p)
     ind_sel_np[sel_vars] = 1
     
     ind_h0_rej = np.zeros(p)
     ind_h0_rej[sel_vars[h0_rejs]] = 1        
     ind_h0_rej = {'H' : ind_h0_rej}
     
     ind_h0_rej_true = np.zeros(p)
     ind_h0_rej_true[sel_vars] = 1
     ind_h0_rej_true[niv:] = 0
     ind_h0_rej_true = {'H' : ind_h0_rej_true}
     
     if self.poly:
         # p-values not provided for Poly
         p_values = None
     else:
         p_values = -np.ones(p)
         p_values[sel_vars] = results['pvals']
         p_values = {'H': p_values}
     return sim.Inference_Result(p, ind_sc_np, ind_sel_np, ind_h0_rej,
                                    ind_h0_rej_true, p_values, None)
示例#3
0
def estimate_M(X, estimator, B, ratio):
    """Estimating M with Block or incomplete U-statistics estimator
    :param B: Block size
    :param ratio: size of incomplete U-statistics estimator
    """
    p = X.shape[1]
    x_bw = util.meddistance(X, subsample = 1000)**2
    kx = kernel.KGauss(x_bw)
    if estimator == 'inc':
        hsic_M = hsic.HSIC_Inc(kx, kx, ratio = ratio)
    else: # 'block'
        hsic_M = hsic.HSIC_Block(kx, kx, bsize = B)

    M_true = np.zeros((p, p))
    for i in range(p):
        for j in range(i+1):
            M_true[i, j] = np.mean(hsic_M.estimates(X[:, i, np.newaxis], X[:, j, np.newaxis]))
            M_true[j, i] = M_true[i, j]
    M = nearestPD(M_true) # positive definite approximation
    return M_true, M
示例#4
0
    def sel_inf(self,
                X,
                Y,
                inf_type,
                alpha,
                niv,
                H0=None,
                M0=None,
                i=None,
                unbiased_parallel=False,
                n_jobs=20):
        """Post-selection inference
        :param X, Y: covariate and response data
        :param inf_type: one-sided hypothesis testing or two-sided confidence interval calculation
        :param alpha: level 1-alpha
        :param niv: number of important variables, used for reporting results
        H0, M0, i, unbiased_parallel and n_jobs are not used
        """
        assert inf_type == 'test'
        p = X.shape[1]
        # Initialising kernels
        x_bw = util.meddistance(X, subsample=1000)**2
        kx = kernel.KGauss(x_bw)
        if self.discrete_output:
            values, counts = np.unique(Y, return_counts=True)
            freq_dict = dict(zip(values, counts))
            ky = KDiscrete(freq_dict, freq_dict)
        else:
            y_bw = util.meddistance(Y[:, np.newaxis], subsample=1000)**2
            ky = kernel.KGauss(y_bw)

        if self.estimator == 'inc':
            hsic_H = hsic.HSIC_Inc(kx, ky, ratio=self.l)
        else:  # 'block'
            hsic_H = hsic.HSIC_Block(kx, ky, bsize=self.B)

        if self.poly:
            feat_select = PolySel(hsic_H)
        else:  # multi
            feat_select = MultiSel(hsic_H)

        # Behaviour for evaluation of power w.r.t. first feature
        if self.only_evaluate_first:
            params = hsic_H.compute(X, Y[:, np.newaxis])
            sel_vars = np.argpartition(params, -self.n_select,
                                       axis=0)[-self.n_select:]
            # only continue if the first feature was selected
            if 0 in sel_vars:
                results = feat_select.test(X,
                                           Y[:, np.newaxis],
                                           args=self.n_select,
                                           alpha=alpha)
                sel_vars = results['sel_vars']
                h0_rejs = results['h0_rejs']
            else:
                # fake values
                sel_vars = np.arange(p - self.n_select, p)
                h0_rejs = np.array([self.n_select - 1])
        # Regular behaviour
        else:
            results = feat_select.test(X,
                                       Y[:, np.newaxis],
                                       args=self.n_select,
                                       alpha=alpha)
            sel_vars = results['sel_vars']
            h0_rejs = results['h0_rejs']

        # Reporting
        ind_sc_np = None
        ind_sel_np = np.zeros(p)
        ind_sel_np[sel_vars] = 1

        ind_h0_rej = np.zeros(p)
        ind_h0_rej[sel_vars[h0_rejs]] = 1
        ind_h0_rej = {'H': ind_h0_rej}

        ind_h0_rej_true = np.zeros(p)
        ind_h0_rej_true[sel_vars] = 1
        ind_h0_rej_true[niv:] = 0
        ind_h0_rej_true = {'H': ind_h0_rej_true}

        p_values = -np.ones(p)
        # p-values not provided for Poly
        # p-values not of interest for evaluation of empirical power
        if not self.poly and not self.only_evaluate_first:
            p_values[sel_vars] = results['pvals']
        p_values = {'H': p_values}

        inf_res = sim.Inference_Result(p, ind_sc_np, ind_sel_np, ind_h0_rej,
                                       ind_h0_rej_true, p_values, None)
        return inf_res