Пример #1
0
def Cv_Leave_One_Curve(x, y, x0, candidate_h, ker_fun='Gaussian'):
    """
    input variable:
        x : matrix with dimension (num_of_points, dim)
        y : vecor with length num_of_points
        x0 : narray
        h : vector
        bin_width : vector
    """
    grid_shape = np.asarray(x0.shape[:-1])
    num_fun, num_pt, d = x.shape
    no_nan_val = ~np.isnan(y)
    bin_width = np.ptp(x0.reshape(-1, d), 0) / (grid_shape - 1)
    x0_min = x0.reshape(-1, d).min(0)
    x0_max = x0.reshape(-1, d).max(0)
    grid = tuple(
        np.linspace(x0_min.take(i), x0_max.take(i), x0.shape[i])
        for i in range(d))

    bin_data_y, bin_data_num = lpr.Bin_Data(
        np.compress(no_nan_val.reshape(-1), x.reshape(-1, d), 0),
        np.compress(no_nan_val.reshape(-1), y.reshape(-1)), x0)
    #rand_num_fun = np.random.choice(num_fun, 100 if num_fun > 100 else num_fun, replace = False)
    rand_num_fun = np.arange(num_fun)
    test_fun = x.take(rand_num_fun, 0)
    test_y = y.take(rand_num_fun, 0)
    rand_num_no_nan_val = no_nan_val.take(rand_num_fun, 0)
    sse = np.ones(candidate_h.shape[0])
    for i in range(candidate_h.shape[0]):
        h = candidate_h.take(i, 0)
        r = lpr.Get_Range(bin_width, h, ker_fun)
        delta_x = lpr.Get_Delta_x(bin_width, r)
        weight = lpr.Get_Weight(delta_x, h, ker_fun)
        big_x = np.hstack((np.ones((delta_x.shape[0], 1)), delta_x))
        h_too_small = False
        for fun_i in range(test_fun.shape[0]):
            test_funi = test_fun.take(fun_i, 0)
            test_yi = test_y.take(fun_i, 0)
            fun_no_nan_value = rand_num_no_nan_val.take(fun_i, 0)
            fun_biny, fun_binx = lpr.Bin_Data(
                np.compress(fun_no_nan_value, test_funi, 0),
                np.compress(fun_no_nan_value, test_yi), x0)
            biny = bin_data_y - fun_biny
            binx = bin_data_num - fun_binx
            ext_biny, ext_binx = lpr.Extend_Bin_Data([biny, binx], r)
            train_y = lpr.Get_Linear_Solve(big_x.T, weight, ext_binx, ext_biny,
                                           r)
            if np.any(np.isnan(train_y)):
                h_too_small = True
                break
            interp_fun = sp.interpolate.RegularGridInterpolator(
                grid, train_y.reshape(x0.shape[:-1]), bounds_error=False)
            interp_y = interp_fun(test_funi)
            sse[i] += np.nansum((test_yi - interp_y)**2)
        if h_too_small:
            sse[i] = np.nan
    opt_h = candidate_h.take(np.nanargmin(sse), 0)
    return (opt_h)
Пример #2
0
 def __CV_Cov_Leave_One_Out(self, x, y, x0, candidate_h, ker_fun):
     grid_shape, d = np.asarray(x0.shape[:-1]), x0.shape[-1]
     n_grid = np.prod(self.__grid_shape)
     x_displacement = np.rint((x - x0.reshape(-1, d).min(axis=0)) /
                              self.__bin_width).astype(np.int32)
     x_p = np.sum(x_displacement *
                  np.append(grid_shape[::-1].cumprod()[-2::-1], 1),
                  axis=2)
     xx_p = (x_p.repeat(x_p.shape[1], 1) * n_grid +
             np.tile(x_p, x_p.shape[1])).reshape(-1)
     yy = np.einsum('ij,ik->ijk', y, y).reshape(-1)
     #random_order = np.random.choice(self.__num_fun, 100 if self.__num_fun > 100 else self.__num_fun, replace=False)
     random_order = np.arange(self.__num_fun)
     test_fun = x.take(random_order, 0)
     test_xx_p = xx_p.reshape(self.__num_fun, -1).take(random_order, 0)
     test_yy = yy.reshape(self.__num_fun, -1).take(random_order, 0)
     non_nan_value = ~np.isnan(yy)
     xx_p = np.compress(non_nan_value, xx_p)
     yy = np.compress(non_nan_value, yy)
     tot_binx = np.bincount(xx_p, minlength=n_grid**2)
     tot_biny = np.bincount(xx_p, yy, minlength=n_grid**2)
     sse = np.ones(candidate_h.shape[0])
     for i in range(candidate_h.shape[0]):
         h = candidate_h.take(i, 0)
         r = lpr.Get_Range(self.__bin_width, h, ker_fun)
         delta_x = lpr.Get_Delta_x(self.__bin_width, r)
         weight = lpr.Get_Weight(delta_x, h, ker_fun)
         big_x = np.hstack((np.ones((delta_x.shape[0], 1)), delta_x))
         h_too_small = False
         for fun_i in range(test_xx_p.shape[0]):
             test_funi = test_fun.take(fun_i, 0)
             fun_non_nan_value = non_nan_value.reshape(
                 self.__num_fun, -1).take(random_order.take(fun_i), 0)
             fun_xx = np.compress(fun_non_nan_value,
                                  test_xx_p.take(fun_i, 0))
             fun_yy = np.compress(fun_non_nan_value, test_yy.take(fun_i, 0))
             fun_binx = np.bincount(fun_xx, minlength=n_grid**2)
             fun_biny = np.bincount(fun_xx, fun_yy, minlength=n_grid**2)
             binx = tot_binx - fun_binx
             biny = tot_biny - fun_biny
             ext_biny, ext_binx = lpr.Extend_Bin_Data([
                 biny.reshape(n_grid, n_grid),
                 binx.reshape(n_grid, n_grid)
             ], r)
             train_yy = lpr.Get_Linear_Solve(big_x.T, weight, ext_binx,
                                             ext_biny, r)
             if np.any(np.isnan(train_yy)):
                 h_too_small = True
                 break
             interp_fun = sp.interpolate.RegularGridInterpolator(
                 self.__grid * 2,
                 train_yy.reshape(x0.shape[:-1] * 2),
                 bounds_error=False)
             interp_y = interp_fun(
                 np.hstack((test_funi.repeat(self.__num_pt, 0),
                            np.tile(test_funi.reshape(-1),
                                    self.__num_pt).reshape(-1, self.__d))))
             sse[i] += np.nansum((test_yy.take(fun_i, 0) - interp_y)**2)
         if h_too_small:
             sse[i] = np.nan
     h_opt = candidate_h.take(np.nanargmin(sse), 0)
     return ([h_opt, xx_p, yy])